diff --git a/.gitattributes b/.gitattributes index b3623c426e7..6bbad541ac6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -18,7 +18,6 @@ .github/ export-ignore .idea/ export-ignore .readthedocs.yml export-ignore -.travis.yml export-ignore _config.yml export-ignore codecov.yml export-ignore licenses-binary/ export-ignore diff --git a/.github/ISSUE_TEMPLATE/dependency.yml b/.github/ISSUE_TEMPLATE/dependency.yml new file mode 100644 index 00000000000..e71c7d1c64a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/dependency.yml @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# See https://gh-community.github.io/issue-template-feedback/structured/ + +name: Dependency +title: ":arrow_up: Upgrade from to " +description: Keep upstream dependencies fresh and stable +labels: [ "kind:build, priority:major, good first issue, help wanted" ] +body: + - type: markdown + attributes: + value: | + Thank you for finding the time to report the issue! We really appreciate the community's efforts to improve Kyuubi. + + It doesn't really matter whether what you are reporting is a bug or not, just feel free to share the problem you have + encountered with the community. For best practices, if it is indeed a bug, please try your best to provide the reproducible + steps. If you want to ask questions or share ideas, please [subscribe to our mailing list](mailto:dev-subscribe@kyuubi.apache.org) + and send emails to [our mailing list](mailto:dev@kyuubi.apache.org), you can also head to our + [Discussions](https://github.com/apache/kyuubi/discussions) tab. + + - type: checkboxes + attributes: + label: Code of Conduct + description: The Code of Conduct helps create a safe space for everyone. We require that everyone agrees to it. + options: + - label: > + I agree to follow this project's [Code of Conduct](https://www.apache.org/foundation/policies/conduct) + required: true + + - type: checkboxes + attributes: + label: Search before asking + options: + - label: > + I have searched in the [issues](https://github.com/apache/kyuubi/issues?q=is%3Aissue) and found no similar + issues. + required: true + + - type: dropdown + id: priority + attributes: + label: Why do we need to upgrade this artifact? + options: + - Common Vulnerabilities and Exposures (CVE) + - Bugfixes + - Usage of New Features + - Performance Improvements + - Regular Updates + validations: + required: true + + - type: input + id: artifact + attributes: + label: Artifact Name + description: Which artifact shall be upgraded? + placeholder: e.g. spark-sql + value: https://mvnrepository.com/search?q= + validations: + required: true + + - type: input + id: versions + attributes: + label: Target Version + description: Which version shall be upgraded? + placeholder: e.g. 1.2.1 + validations: + required: true + + - type: textarea + id: changes + attributes: + label: Notable Changes + description: Please provide notable changes, or release notes if any + validations: + required: false + + - type: checkboxes + attributes: + label: Are you willing to submit PR? + description: > + A pull request is optional, but we are glad to help you in the contribution process + especially if you already know a good understanding of how to implement the fix. + Kyuubi is a community-driven project and we love to bring new contributors in. + options: + - label: Yes. I would be willing to submit a PR with guidance from the Kyuubi community to fix. + - label: No. I cannot submit a PR at this time. + + - type: markdown + attributes: + value: > + After changing the corresponding dependency version and before submitting your pull request, + it is necessary to execute `build/dependency.sh --replace` locally to update `dev/dependencyList`. diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index 6e37b26c46f..bdb71f30fb1 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -20,4 +20,4 @@ Please clarify why the changes are needed. For instance, - [ ] Add screenshots for manual tests if appropriate -- [ ] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request +- [ ] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request diff --git a/.github/actions/cache-engine-archives/action.yaml b/.github/actions/cache-engine-archives/action.yaml new file mode 100644 index 00000000000..86a9ccafb95 --- /dev/null +++ b/.github/actions/cache-engine-archives/action.yaml @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: cache-engine-archives +description: 'Cache download engine archives from Apache Archives website used by Maven download plugin' +runs: + using: composite + steps: + - name: Cache Engine Archives + uses: actions/cache@v3 + with: + path: /tmp/engine-archives + key: engine-archives diff --git a/.github/actions/setup-mvnd/action.yaml b/.github/actions/setup-mvnd/action.yaml new file mode 100644 index 00000000000..dac05c02479 --- /dev/null +++ b/.github/actions/setup-mvnd/action.yaml @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: 'setup-mvnd' +description: 'Setup Maven and Mvnd' +runs: + using: composite + steps: + - name: Cache Mvnd + uses: actions/cache@v3 + with: + path: | + build/maven-mvnd-* + build/apache-maven-* + key: setup-mvnd-${{ runner.os }} + - name: Check Maven + run: build/mvn -v + shell: bash + - name: Check Mvnd + run: build/mvnd -v || true + shell: bash diff --git a/.github/labeler.yml b/.github/labeler.yml index a9f79a5374d..ecec1253274 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -45,7 +45,6 @@ - ".gitattributes" - ".github/**/*" - ".gitignore" - - ".travis.yml" - "LICENSE" - "LICENSE-binary" - "NOTICE" @@ -103,7 +102,8 @@ "module:server": - "bin/kyuubi" - - "kyuubi-server/**/*" + - "kyuubi-server/src/**/*" + - "kyuubi-server/pom.xml" - "extension/server/kyuubi-server-plugin/**/*" "module:spark": @@ -122,3 +122,6 @@ "module:authz": - "extensions/spark/kyuubi-spark-authz/**/*" + +"module:ui": + - "kyuubi-server/web-ui/**/*" diff --git a/.github/workflows/dep.yml b/.github/workflows/dep.yml index 5ea4447cc47..09197951a12 100644 --- a/.github/workflows/dep.yml +++ b/.github/workflows/dep.yml @@ -23,11 +23,13 @@ on: - master - branch-* paths: - # dependency check happens only pom changes + # when pom or dependency workflow changes - '**/pom.xml' + - '.github/workflows/dep.yml' + - .github/actions/setup-mvnd/*.yaml concurrency: - group: dep-${{ github.ref }} + group: dep-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: @@ -43,11 +45,22 @@ jobs: java-version: 8 cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Check kyuubi modules available + id: modules-check + run: >- + build/mvnd dependency:resolve validate -q + -DincludeGroupIds="org.apache.kyuubi" -DincludeScope="compile" + -Pfast -Denforcer.skip=false + -pl kyuubi-ctl,kyuubi-server,kyuubi-assembly -am + continue-on-error: true - name: build env: MAVEN_OPTS: -Dorg.slf4j.simpleLogger.defaultLogLevel=error + if: steps.modules-check.conclusion == 'success' && steps.modules-check.outcome == 'failure' run: >- - build/mvn clean install + build/mvnd clean install -Pflink-provided,spark-provided,hive-provided -Dmaven.javadoc.skip=true -Drat.skip=true @@ -57,3 +70,7 @@ jobs: -pl kyuubi-ctl,kyuubi-server,kyuubi-assembly -am - name: Check dependency list run: build/dependency.sh + - name: Dependency Review + uses: actions/dependency-review-action@v3 + with: + fail-on-severity: moderate diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000000..55cb6b8b16b --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Docs + +on: + pull_request: + branches: + - master + +concurrency: + group: docs-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + sphinx: + name: sphinx-build + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.9' + cache: 'pip' + cache-dependency-path: docs/requirements.txt + - run: pip install -r docs/requirements.txt + - name: make html + run: make -d --directory docs html + - name: upload html + uses: actions/upload-artifact@v3 + with: + path: | + docs/_build/html/ + !docs/_build/html/_sources/ diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml index 43c7a5585a7..77fc14e8078 100644 --- a/.github/workflows/greetings.yml +++ b/.github/workflows/greetings.yml @@ -22,7 +22,7 @@ on: issues jobs: greeting: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: issues: write pull-requests: write diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index eb5d898e900..c4cad7aef2d 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -26,7 +26,7 @@ permissions: jobs: triage: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/labeler@v4 with: diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index 73ef05864a2..e62605e7f09 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -26,7 +26,7 @@ on: - branch-* concurrency: - group: lincense-${{ github.ref }} + group: license-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: @@ -42,8 +42,10 @@ jobs: java-version: 8 cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd - run: >- - build/mvn org.apache.rat:apache-rat-plugin:check + build/mvnd org.apache.rat:apache-rat-plugin:check -Ptpcds -Pspark-block-cleaner -Pkubernetes-it -Pspark-3.1 -Pspark-3.2 -Pspark-3.3 - name: Upload rat report diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6bb2658efa1..b8b3f7072ac 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -28,11 +28,13 @@ on: - branch-* concurrency: - group: test-${{ github.ref }} + group: test-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded + MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded -Dmaven.plugin.download.cache.path=/tmp/engine-archives + KUBERNETES_VERSION: v1.26.1 + MINIKUBE_VERSION: v1.29.0 jobs: default: @@ -75,10 +77,14 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Setup Python uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.9' - name: Build and test Kyuubi and Spark with maven w/o linters run: | TEST_MODULES="dev/kyuubi-codecov" @@ -100,6 +106,7 @@ jobs: path: | **/target/unit-tests.log **/kyuubi-spark-sql-engine.log* + **/kyuubi-spark-batch-submit.log* authz: name: Kyuubi-AuthZ and Spark Test @@ -126,6 +133,10 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Build and test Kyuubi AuthZ with supported Spark versions run: | TEST_MODULES="extensions/spark/kyuubi-spark-authz" @@ -150,20 +161,15 @@ jobs: - 8 - 11 flink: - - '1.14' - '1.15' - '1.16' flink-archive: [ "" ] comment: [ "normal" ] include: - java: 8 - flink: '1.15' - flink-archive: '-Dflink.archive.mirror=https://archive.apache.org/dist/flink/flink-1.14.5 -Dflink.archive.name=flink-1.14.5-bin-scala_2.12.tgz' - comment: 'verify-on-flink-1.14-binary' - - java: 8 - flink: '1.15' - flink-archive: '-Dflink.archive.mirror=https://archive.apache.org/dist/flink/flink-1.16.0 -Dflink.archive.name=flink-1.16.0-bin-scala_2.12.tgz' - comment: 'verify-on-flink-1.16-binary' + flink: '1.16' + flink-archive: '-Dflink.archive.mirror=https://archive.apache.org/dist/flink/flink-1.15.4 -Dflink.archive.name=flink-1.15.4-bin-scala_2.12.tgz' + comment: 'verify-on-flink-1.15-binary' steps: - uses: actions/checkout@v3 - name: Tune Runner VM @@ -175,6 +181,10 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Build Flink with maven w/o linters run: | TEST_MODULES="externals/kyuubi-flink-sql-engine,integration-tests/kyuubi-flink-it" @@ -219,6 +229,10 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Build and test Hive with maven w/o linters run: | TEST_MODULES="externals/kyuubi-hive-sql-engine,integration-tests/kyuubi-hive-it" @@ -254,6 +268,10 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Build and test JDBC with maven w/o linters run: | TEST_MODULES="externals/kyuubi-jdbc-engine,integration-tests/kyuubi-jdbc-it" @@ -289,11 +307,15 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Build and test Trino with maven w/o linters run: | - TEST_MODULES="externals/kyuubi-trino-engine,integration-tests/kyuubi-trino-it" - ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install -DskipTests - ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} test + TEST_MODULES="kyuubi-server,externals/kyuubi-trino-engine,externals/kyuubi-spark-sql-engine,externals/kyuubi-download,integration-tests/kyuubi-trino-it" + ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am -Pflink-provided -Phive-provided clean install -DskipTests + ./build/mvn -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -pl ${TEST_MODULES} -am -Pflink-provided -Phive-provided test -Dtest=none -DwildcardSuites=org.apache.kyuubi.it.trino.operation.TrinoOperationSuite,org.apache.kyuubi.it.trino.server.TrinoFrontendSuite - name: Upload test logs if: failure() uses: actions/upload-artifact@v3 @@ -319,6 +341,10 @@ jobs: java-version: 8 cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Run TPC-DS Tests run: | TEST_MODULES="kyuubi-server,extensions/spark/kyuubi-spark-connector-tpcds,extensions/spark/kyuubi-spark-connector-tpch" @@ -347,15 +373,19 @@ jobs: file: build/Dockerfile load: true tags: apache/kyuubi:latest - # from https://github.com/marketplace/actions/setup-minikube-kubernetes-cluster + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Setup Minikube - uses: manusa/actions-setup-minikube@v2.7.2 - with: - minikube version: 'v1.28.0' - kubernetes version: 'v1.25.4' - github token: ${{ secrets.GITHUB_TOKEN }} + run: | + # https://minikube.sigs.k8s.io/docs/start/ + curl -LO https://github.com/kubernetes/minikube/releases/download/${MINIKUBE_VERSION}/minikube-linux-amd64 + sudo install minikube-linux-amd64 /usr/local/bin/minikube + minikube start --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --force + # https://minikube.sigs.k8s.io/docs/handbook/pushing/#7-loading-directly-to-in-cluster-container-runtime + minikube image load apache/kyuubi:latest - name: kubectl pre-check run: | + kubectl get nodes kubectl get serviceaccount kubectl create serviceaccount kyuubi kubectl create clusterrolebinding kyuubi-role --clusterrole=edit --serviceaccount=default:kyuubi @@ -394,14 +424,14 @@ jobs: steps: - name: Checkout uses: actions/checkout@v3 - # from https://github.com/marketplace/actions/setup-minikube-kubernetes-cluster + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: Setup Minikube - uses: manusa/actions-setup-minikube@v2.7.2 - with: - minikube version: 'v1.25.2' - kubernetes version: 'v1.23.3' - driver: docker - start args: '--extra-config=kubeadm.ignore-preflight-errors=NumCPU --force --cpus 2 --memory 4096' + run: | + # https://minikube.sigs.k8s.io/docs/start/ + curl -LO https://github.com/kubernetes/minikube/releases/download/${MINIKUBE_VERSION}/minikube-linux-amd64 + sudo install minikube-linux-amd64 /usr/local/bin/minikube + minikube start --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --force # in case: https://spark.apache.org/docs/latest/running-on-kubernetes.html#rbac - name: Create Service Account run: | @@ -413,7 +443,6 @@ jobs: run: >- ./build/mvn ${MVN_OPT} clean install -Pflink-provided,hive-provided - -Pspark-3.2 -Pkubernetes-it -Dtest=none -DwildcardSuites=org.apache.kyuubi.kubernetes.test.spark - name: Print Driver Pod logs @@ -451,6 +480,10 @@ jobs: java-version: ${{ matrix.java }} cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives - name: zookeeper integration tests run: | export KYUUBI_IT_ZOOKEEPER_VERSION=${{ matrix.zookeeper }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 149da6d82b3..b53a7d29294 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -43,6 +43,8 @@ jobs: java-version: 8 cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd - name: Build with Maven run: ./build/mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -V - name: Upload test logs diff --git a/.github/workflows/docker-image.yml b/.github/workflows/publish-snapshot-docker.yml similarity index 60% rename from .github/workflows/docker-image.yml rename to .github/workflows/publish-snapshot-docker.yml index b403e46b53b..3afccee7aa8 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/publish-snapshot-docker.yml @@ -15,33 +15,37 @@ # limitations under the License. # -name: Publish Docker image +name: Publish Snapshot Docker Image on: - push: - branches: - - master + schedule: + - cron: '0 0 * * *' jobs: push_to_registry: - name: Push Docker image to Docker Hub + name: Push Snapshot Docker Image to Docker Hub if: ${{ startsWith(github.repository, 'apache/') }} runs-on: ubuntu-22.04 - concurrency: - # this group should be global unique - group: push-docker-image - cancel-in-progress: true steps: - name: Checkout uses: actions/checkout@v3 + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USER }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build Kyuubi Docker Image - run: docker build --tag apache/kyuubi:master-snapshot --file build/Dockerfile . - - name: Docker image - run: docker images - - name: Push Docker image - run: docker push apache/kyuubi:master-snapshot + - name: Build and Push Kyuubi Docker Image + uses: docker/build-push-action@v4 + with: + # build cache on Github Actions, See: https://docs.docker.com/build/cache/backends/gha/#using-dockerbuild-push-action + cache-from: type=gha + cache-to: type=gha,mode=max + context: . + file: build/Dockerfile + platforms: linux/amd64,linux/arm64 + push: true + tags: apache/kyuubi:master-snapshot diff --git a/.github/workflows/publish-snapshot.yml b/.github/workflows/publish-snapshot-nexus.yml similarity index 69% rename from .github/workflows/publish-snapshot.yml rename to .github/workflows/publish-snapshot-nexus.yml index acd04bfab80..0d4222b044a 100644 --- a/.github/workflows/publish-snapshot.yml +++ b/.github/workflows/publish-snapshot-nexus.yml @@ -15,11 +15,11 @@ # limitations under the License. # -name: Publish Snapshot +name: Publish Snapshot Nexus on: schedule: - - cron: '0 0 * * *' + - cron: '0 0 * * *' jobs: publish-snapshot: @@ -41,19 +41,19 @@ jobs: - branch: branch-1.6 profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.3 steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - ref: ${{ matrix.branch }} - - name: Setup JDK 8 - uses: actions/setup-java@v3 - with: - distribution: temurin - java-version: 8 - cache: 'maven' - check-latest: false - - name: Publish snapshot - ${{ matrix.branch }} - env: - ASF_USERNAME: ${{ secrets.NEXUS_USER }} - ASF_PASSWORD: ${{ secrets.NEXUS_PW }} - run: ./build/mvn clean deploy -s ./build/release/asf-settings.xml -DskipTests ${{ matrix.profiles }} + - name: Checkout repository + uses: actions/checkout@v3 + with: + ref: ${{ matrix.branch }} + - name: Setup JDK 8 + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + cache: 'maven' + check-latest: false + - name: Publish Snapshot Jar to Nexus - ${{ matrix.branch }} + env: + ASF_USERNAME: ${{ secrets.NEXUS_USER }} + ASF_PASSWORD: ${{ secrets.NEXUS_PW }} + run: build/mvn clean deploy -s build/release/asf-settings.xml -DskipTests ${{ matrix.profiles }} diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index e1dfde6f47f..d189cd205db 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -23,11 +23,11 @@ on: jobs: stale: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: pull-requests: write steps: - - uses: actions/stale@v6 + - uses: actions/stale@v7 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-pr-message: > diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index c848a2f8c42..2824e597288 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -24,7 +24,7 @@ on: - branch-* concurrency: - group: linter-${{ github.ref }} + group: linter-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: @@ -34,10 +34,12 @@ jobs: strategy: matrix: profiles: - - '-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.3,spark-3.2,spark-3.1,tpcds' + - '-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.3,spark-3.2,spark-3.1,tpcds,kubernetes-it' steps: - uses: actions/checkout@v3 + with: + fetch-depth: 0 - name: Setup JDK 8 uses: actions/setup-java@v3 with: @@ -45,14 +47,16 @@ jobs: java-version: 8 cache: 'maven' check-latest: false + - name: Setup Maven and Mvnd + uses: ./.github/actions/setup-mvnd - name: Setup Python 3 uses: actions/setup-python@v4 with: python-version: '3.9' cache: 'pip' - - name: Check kyuubi modules avaliable + - name: Check kyuubi modules available id: modules-check - run: build/mvn dependency:resolve -DincludeGroupIds="org.apache.kyuubi" -DincludeScope="compile" -DexcludeTransitive=true ${{ matrix.profiles }} + run: build/mvnd dependency:resolve -DincludeGroupIds="org.apache.kyuubi" -DincludeScope="compile" -DexcludeTransitive=true -q ${{ matrix.profiles }} continue-on-error: true - name: Install @@ -61,13 +65,13 @@ jobs: if: steps.modules-check.conclusion == 'success' && steps.modules-check.outcome == 'failure' run: | MVN_OPT="-DskipTests -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip" - build/mvn clean install ${MVN_OPT} -Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.2,tpcds - build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-1 -Pspark-3.1 - build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-3,extensions/spark/kyuubi-spark-connector-kudu,extensions/spark/kyuubi-spark-connector-hive -Pspark-3.3 + build/mvnd clean install ${MVN_OPT} -Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.2,tpcds + build/mvnd clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-1 -Pspark-3.1 + build/mvnd clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-3,extensions/spark/kyuubi-spark-connector-kudu,extensions/spark/kyuubi-spark-connector-hive -Pspark-3.3 - name: Scalastyle with maven id: scalastyle-check - run: build/mvn scalastyle:check ${{ matrix.profiles }} + run: build/mvnd scalastyle:check -q ${{ matrix.profiles }} - name: Print scalastyle error report if: failure() && steps.scalastyle-check.outcome != 'success' run: >- @@ -81,7 +85,7 @@ jobs: run: | SPOTLESS_BLACK_VERSION=$(build/mvn help:evaluate -Dexpression=spotless.python.black.version -q -DforceStdout) pip install black==$SPOTLESS_BLACK_VERSION - build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python + build/mvnd spotless:check -q ${{ matrix.profiles }} -Pspotless-python - name: setup npm uses: actions/setup-node@v3 with: @@ -89,7 +93,7 @@ jobs: - name: Web UI Style with node run: | cd ./kyuubi-server/web-ui - npm install pnpm -g + npm install pnpm@7 -g pnpm install pnpm run lint echo "---------------------------------------Notice------------------------------------" @@ -102,10 +106,32 @@ jobs: echo "---------------------------------------------------------------------------------" shellcheck: - name: Shellcheck + name: Super Linter and Shellcheck runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 + - name: Super Linter Checks + uses: github/super-linter/slim@v4 + env: + CREATE_LOG_FILE: true + ERROR_ON_MISSING_EXEC_BIT: true + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + IGNORE_GENERATED_FILES: true + IGNORE_GITIGNORED_FILES: true + LINTER_RULES_PATH: / + LOG_LEVEL: NOTICE + SUPPRESS_POSSUM: true + VALIDATE_BASH_EXEC: true + VALIDATE_ENV: true + VALIDATE_JSONC: true + VALIDATE_POWERSHELL: true + VALIDATE_XML: true + - name: Upload Super Linter logs + if: failure() + uses: actions/upload-artifact@v3 + with: + name: super-linter-log + path: super-linter.log - name: check bin directory uses: ludeeus/action-shellcheck@1.1.0 with: diff --git a/.github/workflows/web-ui.yml b/.github/workflows/web-ui.yml index 08c97cfc9e0..2a48eeaa1ea 100644 --- a/.github/workflows/web-ui.yml +++ b/.github/workflows/web-ui.yml @@ -11,7 +11,7 @@ on: - branch-* concurrency: - group: web-ui-${{ github.ref }} + group: web-ui-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: @@ -28,7 +28,7 @@ jobs: - name: npm run coverage & build run: | cd ./kyuubi-server/web-ui - npm install pnpm -g + npm install pnpm@7 -g pnpm install pnpm run coverage pnpm run build diff --git a/.gitignore b/.gitignore index d2c1ba3b7d0..190294d06f3 100644 --- a/.gitignore +++ b/.gitignore @@ -32,11 +32,7 @@ .ensime_lucene .generated-mima* .vscode/ -# The star is required for further !/.idea/ to work, see https://git-scm.com/docs/gitignore -/.idea/* -# Icon for JetBrains Toolbox -!/.idea/icon.png -!/.idea/vcs.xml +.idea/ .idea_modules/ .project .pydevproject @@ -44,6 +40,7 @@ .scala_dependencies .settings build/apache-maven* +build/maven-mvnd* build/release/tmp build/scala* build/test @@ -59,10 +56,9 @@ hs_err_pid* spark-warehouse/ metastore_db derby.log -ldap +rest-audit.log **/dependency-reduced-pom.xml -metrics/report.json -metrics/.report.json.crc +metrics/ /kyuubi-ha/embedded_zookeeper/ embedded_zookeeper/ /externals/kyuubi-spark-sql-engine/operation_logs/ diff --git a/.rat-excludes b/.rat-excludes index 86c38ec9925..7a841cf9c6c 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -32,6 +32,7 @@ NOTICE* docs/** build/apache-maven-*/** +build/maven-mvnd-*/** build/scala-*/** **/**/operation_logs/**/** **/**/server_operation_logs/**/** @@ -50,6 +51,8 @@ build/scala-*/** **/metadata-store-schema*.sql **/*.derby.sql **/*.mysql.sql +**/node/** +**/web-ui/dist/** **/pnpm-lock.yaml **/node_modules/** **/gen/* diff --git a/.scalafmt.conf b/.scalafmt.conf index 2ccb453ddb6..e682a17f71f 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,4 +1,4 @@ -version = 3.6.1 +version = 3.7.1 runner.dialect=scala212 project.git=true diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c09fa9566e4..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,71 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -sudo: required -dist: focal -arch: arm64-graviton2 -group: edge -virt: vm -env: SPARK_LOCAL_IP=localhost - -branches: - only: - - master - -language: java - -matrix: - include: - - name: Build Kyuubi common on Linux ARM64 - script: - - ./build/mvn test $MVN_ARGS -pl kyuubi-common,kyuubi-zookeeper,kyuubi-ha,kyuubi-ctl,kyuubi-metrics,kyuubi-hive-beeline,kyuubi-hive-jdbc,extensions/server/kyuubi-server-plugin -am - - name: Build Kyuubi Flink on Linux ARM64 - script: - - ./build/mvn test $MVN_ARGS -pl externals/kyuubi-flink-sql-engine,integration-tests/kyuubi-flink-it - - name: Build Kyuubi Spark on Linux ARM64 - script: - - ./build/mvn test $MVN_ARGS -pl externals/kyuubi-spark-sql-engine - - ./build/mvn test $MVN_ARGS -pl kyuubi-server -DwildcardSuites=org.apache.kyuubi.operation.KyuubiOperationPerUserSuite - - name: Build Kyuubi Trino on Linux ARM64 - script: - - ./build/mvn test $MVN_ARGS -pl externals/kyuubi-trino-engine,integration-tests/kyuubi-trino-it - - name: Build Kyuubi Hive on Linux ARM64 - script: - - ./build/mvn test $MVN_ARGS -pl externals/kyuubi-hive-sql-engine,integration-tests/kyuubi-hive-it - -cache: - directories: - - $HOME/.m2 - -install: - - sudo apt update - - sudo apt install -y openjdk-8-jdk - - export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-${TRAVIS_CPU_ARCH}" - - export PATH="$JAVA_HOME/bin:/usr/share/maven/bin:$PATH" - - ./build/mvn --version - -before_script: - - export MVN_ARGS="-Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -V -B -ntp -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded" - - ./build/mvn clean install -DskipTests $MVN_ARGS - - -after_success: - - echo "Travis exited with ${TRAVIS_TEST_RESULT}" - -after_failure: - - echo "Travis exited with ${TRAVIS_TEST_RESULT}" - - for log in `find * -name "unit-tests.log"`; do echo "=========$log========="; grep "ERROR" $log -A 100 -B 5; done diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9b6348cd29a..ef28d560e36 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,25 +1,25 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Contributing to Apache Kyuubi Thanks for your interest in the Apache Kyuubi project. Contributions are welcome and are greatly appreciated! -Every little bit helps, and a credit will always be given. +Every little effort helps, and a credit will always be given. This page provides some orientation and resources we have for you to get involved. It also offers recommendations on getting the best results when engaging with the community. @@ -31,7 +31,7 @@ In the process of using Apache Kyuubi, if you have any questions, suggestions, o - Join the [Mailing Lists](https://kyuubi.apache.org/mailing_lists.html) - the best way to keep up-to-date with the community. - [Issue Tracker](https://kyuubi.apache.org/issue_tracking.html) - tracking bugs, ideas, plans, etc. -- [Github Discussions](https://github.com/apache/kyuubi/discussions) - second to mailing list for anything else you want to share or ask +- [GitHub Discussions](https://github.com/apache/kyuubi/discussions) - second to mailing list for anything else you want to share or ask - [Slack](https://join.slack.com/t/apachekyuubi/shared_invite/zt-1e1qw68g4-yE5HJsVVDin~ABtZISyuxg) - chat with our community User && Developer anytime! ## Contributing Guide @@ -44,8 +44,8 @@ There are many ways to make valuable contributions to the project and community. You can make various types of contributions to Kyuubi, including the following but not limited to, - Answer questions in the [Mailing Lists](https://kyuubi.apache.org/mailing_lists.html) -- [Share your success stories with us](https://github.com/apache/kyuubi/discussions/925) -- Improve Documentation - [![Documentation Status](https://readthedocs.org/projects/kyuubi/badge/?version=latest)](https://kyuubi.apache.org/docs/latest/) +- [Share your success stories with us](https://github.com/apache/kyuubi/discussions/925) +- Improve Documentation - [![Documentation Status](https://readthedocs.org/projects/kyuubi/badge/?version=latest)](https://kyuubi.readthedocs.io/en/master/) - Test latest releases - [![Latest tag](https://img.shields.io/github/v/tag/apache/kyuubi?label=tag)](https://github.com/apache/kyuubi/tags) - Improve test coverage - [![codecov](https://codecov.io/gh/apache/kyuubi/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/kyuubi) - Report bugs and better help developers to reproduce @@ -59,4 +59,5 @@ You can make various types of contributions to Kyuubi, including the following b TBD, please be patient for the surprise. ## IDE Setup Guide + [IntelliJ IDEA Setup Guide](https://kyuubi.readthedocs.io/en/master/develop_tools/idea_setup.html) diff --git a/LICENSE-binary b/LICENSE-binary index e80398a431a..a52ea95fbf0 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -319,9 +319,14 @@ io.swagger.core.v3:swagger-models io.vertx:vertx-core io.vertx:vertx-grpc org.apache.zookeeper:zookeeper +com.squareup.retrofit2:retrofit +com.squareup.okhttp3:okhttp BSD ------------ +org.antlr:antlr-runtime +org.antlr:antlr4-runtime +org.antlr:ST4 jline:jline com.thoughtworks.paranamer:paranamer dk.brics.automaton:automaton @@ -353,6 +358,9 @@ org.codehaus.mojo:animal-sniffer-annotations org.slf4j:slf4j-api org.slf4j:jcl-over-slf4j org.slf4j:jul-over-slf4j +com.theokanning.openai-gpt3-java:api +com.theokanning.openai-gpt3-java:client +com.theokanning.openai-gpt3-java:service kyuubi-server/src/main/resources/org/apache/kyuubi/ui/static/assets/fonts/* kyuubi-server/src/main/resources/org/apache/kyuubi/ui/static/icon.min.css @@ -448,6 +456,8 @@ is auto-generated by `pnpm licenses list --prod`. ├────────────────────────────────────┼──────────────┤ │ csstype │ MIT │ ├────────────────────────────────────┼──────────────┤ +│ date-fns │ MIT │ +├────────────────────────────────────┼──────────────┤ │ dayjs │ MIT │ ├────────────────────────────────────┼──────────────┤ │ delayed-stream │ MIT │ diff --git a/README.md b/README.md index 6ac866c3071..e54f6fac00d 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,58 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> + +

+ Kyuubi logo +

+ +

+ + + + + + + + + + + + + + + +

+

+ Project + - + Documentation + - + Who's using +

# Apache Kyuubi -Kyuubi logo - -[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0.html) -[![Release](https://img.shields.io/github/v/release/apache/kyuubi?label=release)](https://github.com/apache/kyuubi/releases) -[![](https://tokei.rs/b1/github.com/apache/kyuubi)](https://github.com/apache/kyuubi) -[![codecov](https://codecov.io/gh/apache/kyuubi/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/kyuubi) -![GitHub Workflow Status](https://img.shields.io/github/workflow/status/apache/kyuubi/Kyuubi/master?style=plastic) -[![Travis](https://api.travis-ci.com/apache/kyuubi.svg?branch=master)](https://travis-ci.com/apache/kyuubi) -[![Documentation Status](https://readthedocs.org/projects/kyuubi/badge/?version=latest)](https://kyuubi.apache.org/docs/latest/) -![GitHub top language](https://img.shields.io/github/languages/top/apache/kyuubi) -[![Commit activity](https://img.shields.io/github/commit-activity/m/apache/kyuubi)](https://github.com/apache/kyuubi/graphs/commit-activity) -[![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/apache/kyuubi.svg)](http://isitmaintained.com/project/apache/kyuubi "Average time to resolve an issue") -[![Percentage of issues still open](http://isitmaintained.com/badge/open/apache/kyuubi.svg)](http://isitmaintained.com/project/apache/kyuubi "Percentage of issues still open") +Apache Kyuubi™ is a distributed and multi-tenant gateway to provide serverless +SQL on data warehouses and lakehouses. + ## What is Kyuubi? -Apache Kyuubi™ is a distributed and multi-tenant gateway to provide serverless -SQL on data warehouses and lakehouses. - Kyuubi provides a pure SQL gateway through Thrift JDBC/ODBC interface for end-users to manipulate large-scale data with pre-programmed and extensible Spark SQL engines. This "out-of-the-box" model minimizes the barriers and costs for end-users to use Spark at the client side. At the server-side, Kyuubi server and engines' multi-tenant architecture provides the administrators a way to achieve computing resource isolation, data security, high availability, high client concurrency, etc. ![](./docs/imgs/kyuubi_positioning.png) @@ -45,12 +61,10 @@ Kyuubi provides a pure SQL gateway through Thrift JDBC/ODBC interface for end-us - [x] Multi-tenant Spark Support - [x] Running Spark in a serverless way - ### Target Users Kyuubi's goal is to make it easy and efficient for `anyone` to use Spark(maybe other engines soon) and facilitate users to handle big data like ordinary data. Here, `anyone` means that users do not need to have a Spark technical background but a human language, SQL only. Sometimes, SQL skills are unnecessary when integrating Kyuubi with Apache Superset, which supports rich visualizations and dashboards. - In typical big data production environments with Kyuubi, there should be system administrators and end-users. - System administrators: A small group consists of Spark experts responsible for Kyuubi deployment, configuration, and tuning. @@ -58,7 +72,6 @@ In typical big data production environments with Kyuubi, there should be system Additionally, the Kyuubi community will continuously optimize the whole system with various features, such as History-Based Optimizer, Auto-tuning, Materialized View, SQL Dialects, Functions, e.t.c. - ### Usage scenarios #### Port workloads from HiveServer2 to Spark SQL @@ -71,7 +84,6 @@ HiveServer2 can identify and authenticate a caller, and then if the caller also Kyuubi extends the use of STS in a multi-tenant model based on a unified interface and relies on the concept of multi-tenancy to interact with cluster managers to finally gain the ability of resources sharing/isolation and data security. The loosely coupled architecture of the Kyuubi server and engine dramatically improves the client concurrency and service stability of the service itself. - #### DataLake/LakeHouse Support The vision of Kyuubi is to unify the portal and become an easy-to-use data lake management platform. Different kinds of workloads, such as ETL processing and BI analytics, can be supported by one platform, using one copy of data, with one SQL interface. @@ -80,25 +92,19 @@ The vision of Kyuubi is to unify the portal and become an easy-to-use data lake - Multiple Catalogs support - SQL Standard Authorization support for DataLake(coming) - #### Cloud Native Support Kyuubi can deploy its engines on different kinds of Cluster Managers, such as, Hadoop YARN, Kubernetes, etc. - ![](./docs/imgs/kyuubi_migrating_yarn_to_k8s.png) - ### The Kyuubi Ecosystem(present and future) - The figure below shows our vision for the Kyuubi Ecosystem. Some of them have been realized, some in development, and others would not be possible without your help. ![](./docs/imgs/kyuubi_ecosystem.drawio.png) - - ## Online Documentation Since Kyuubi 1.3.0-incubating, the Kyuubi online documentation is hosted by [https://kyuubi.apache.org/](https://kyuubi.apache.org/). diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 509da0afb24..14d5fe7b09d 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -27,19 +27,21 @@ function error { if [ -z "${KYUUBI_HOME}" ]; then KYUUBI_HOME="$(cd "`dirname "$0"`"/..; pwd)" fi - -CTX_DIR="$KYUUBI_HOME/target/tmp/docker" +KYUUBI_IMAGE_NAME="kyuubi" function is_dev_build { [ ! -f "$KYUUBI_HOME/RELEASE" ] } -function cleanup_ctx_dir { - if is_dev_build; then - rm -rf "$CTX_DIR" - fi -} -trap cleanup_ctx_dir EXIT +if is_dev_build; then + cat <] [--tgz] [--flink-provided] [--spark-provided] [--hive-provided] |" - echo "| [--mvn ] |" - echo "+------------------------------------------------------------------------------------------------------+" + echo "+----------------------------------------------------------------------------------------------+" + echo "| ./build/dist [--name ] [--tgz] [--web-ui] [--flink-provided] [--hive-provided] |" + echo "| [--spark-provided] [--mvn ] |" + echo "+----------------------------------------------------------------------------------------------+" echo "name: - custom binary name, using project version if undefined" echo "tgz: - whether to make a whole bundled package" + echo "web-ui: - whether to include web ui" echo "flink-provided: - whether to make a package without Flink binary" - echo "spark-provided: - whether to make a package without Spark binary" echo "hive-provided: - whether to make a package without Hive binary" + echo "spark-provided: - whether to make a package without Spark binary" echo "mvn: - external maven executable location" echo "" } @@ -67,6 +69,9 @@ while (( "$#" )); do --tgz) MAKE_TGZ=true ;; + --web-ui) + ENABLE_WEBUI=true + ;; --flink-provided) FLINK_PROVIDED=true ;; @@ -212,6 +217,10 @@ fi MVN_DIST_OPT="-DskipTests" +if [[ "$ENABLE_WEBUI" == "true" ]]; then + MVN_DIST_OPT="$MVN_DIST_OPT -Pweb-ui" +fi + if [[ "$SPARK_PROVIDED" == "true" ]]; then MVN_DIST_OPT="$MVN_DIST_OPT -Pspark-provided" fi @@ -238,14 +247,16 @@ echo -e "\$ ${BUILD_COMMAND[@]}\n" rm -rf "$DISTDIR" mkdir -p "$DISTDIR/pid" mkdir -p "$DISTDIR/logs" -mkdir -p "$DISTDIR/jars" mkdir -p "$DISTDIR/work" +mkdir -p "$DISTDIR/jars" +mkdir -p "$DISTDIR/beeline-jars" +mkdir -p "$DISTDIR/web-ui" mkdir -p "$DISTDIR/externals/engines/flink" mkdir -p "$DISTDIR/externals/engines/spark" mkdir -p "$DISTDIR/externals/engines/trino" mkdir -p "$DISTDIR/externals/engines/hive" mkdir -p "$DISTDIR/externals/engines/jdbc" -mkdir -p "$DISTDIR/beeline-jars" +mkdir -p "$DISTDIR/externals/engines/chat" echo "Kyuubi $VERSION $GITREVSTRING built for" > "$DISTDIR/RELEASE" echo "Java $JAVA_VERSION" >> "$DISTDIR/RELEASE" echo "Scala $SCALA_VERSION" >> "$DISTDIR/RELEASE" @@ -303,6 +314,18 @@ for jar in $(ls "$DISTDIR/jars/"); do fi done +# Copy chat engines +cp "$KYUUBI_HOME/externals/kyuubi-chat-engine/target/kyuubi-chat-engine_${SCALA_VERSION}-${VERSION}.jar" "$DISTDIR/externals/engines/chat/" +cp -r "$KYUUBI_HOME"/externals/kyuubi-chat-engine/target/scala-$SCALA_VERSION/jars/*.jar "$DISTDIR/externals/engines/chat/" + +# Share the jars w/ server to reduce binary size +# shellcheck disable=SC2045 +for jar in $(ls "$DISTDIR/jars/"); do + if [[ -f "$DISTDIR/externals/engines/chat/$jar" ]]; then + (cd $DISTDIR/externals/engines/chat; ln -snf "../../../jars/$jar" "$DISTDIR/externals/engines/chat/$jar") + fi +done + # Copy kyuubi tools if [[ -f "$KYUUBI_HOME/tools/spark-block-cleaner/target/spark-block-cleaner_${SCALA_VERSION}-${VERSION}.jar" ]]; then mkdir -p "$DISTDIR/tools/spark-block-cleaner/kubernetes" @@ -321,6 +344,11 @@ for SPARK_EXTENSION_VERSION in ${SPARK_EXTENSION_VERSIONS[@]}; do fi done +if [[ "$ENABLE_WEBUI" == "true" ]]; then + # Copy web ui dist + cp -r "$KYUUBI_HOME/kyuubi-server/web-ui/dist" "$DISTDIR/web-ui/" +fi + if [[ "$FLINK_PROVIDED" != "true" ]]; then # Copy flink binary dist FLINK_BUILTIN="$(find "$KYUUBI_HOME/externals/kyuubi-download/target" -name 'flink-*' -type d)" diff --git a/build/kyuubi-build-info b/build/kyuubi-build-info index a3da7ccae01..8ac7ee2e20e 100755 --- a/build/kyuubi-build-info +++ b/build/kyuubi-build-info @@ -32,6 +32,7 @@ echo_build_properties() { echo kyuubi_trino_version="$9" echo user="$USER" echo revision=$(git rev-parse HEAD) + echo revision_time=$(git show -s --format=%ci HEAD) echo branch=$(git rev-parse --abbrev-ref HEAD) echo date=$(date -u +%Y-%m-%dT%H:%M:%SZ) echo url=$(git config --get remote.origin.url) diff --git a/build/mvn b/build/mvn index d67638ba274..67aa02b4f79 100755 --- a/build/mvn +++ b/build/mvn @@ -76,7 +76,7 @@ install_mvn() { fi # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } - if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then + if [ $(version $MVN_DETECTED_VERSION) -ne $(version $MVN_VERSION) ]; then local APACHE_MIRROR=${APACHE_MIRROR:-'https://archive.apache.org/dist/'} install_app \ diff --git a/build/mvnd b/build/mvnd new file mode 100755 index 00000000000..81a6f5c20a5 --- /dev/null +++ b/build/mvnd @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Determine the current working directory +_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Preserve the calling directory +_CALLING_DIR="$(pwd)" +# Options used during compilation +_COMPILE_JVM_OPTS="-Xms2g -Xmx2g -XX:ReservedCodeCacheSize=1g -Xss128m" + +if [ "$CI" ]; then + export MAVEN_CLI_OPTS="-Dmvnd.minThreads=4 --no-transfer-progress --errors --fail-fast -Dstyle.color=always" +fi + +# Installs any application tarball given a URL, the expected tarball name, +# and, optionally, a checkable binary path to determine if the binary has +# already been installed +## Arg1 - URL +## Arg2 - Tarball Name +## Arg3 - Checkable Binary +install_app() { + local remote_tarball="$1/$2" + local local_tarball="${_DIR}/$2" + local binary="${_DIR}/$3" + + # setup `curl` and `wget` silent options if we're running on Jenkins + local curl_opts="-L" + local wget_opts="" + curl_opts="--progress-bar ${curl_opts}" + wget_opts="--progress=bar:force ${wget_opts}" + + if [ -z "$3" ] || [ ! -f "$binary" ]; then + # check if we already have the tarball + # check if we have curl installed + # download application + rm -f "$local_tarball" + [ ! -f "${local_tarball}" ] && [ "$(command -v curl)" ] && \ + echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \ + curl ${curl_opts} "${remote_tarball}" > "${local_tarball}" + # if the file still doesn't exist, lets try `wget` and cross our fingers + [ ! -f "${local_tarball}" ] && [ "$(command -v wget)" ] && \ + echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \ + wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}" + # if both were unsuccessful, exit + [ ! -f "${local_tarball}" ] && \ + echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ + echo "please install manually and try again." && \ + exit 2 + cd "${_DIR}" && tar -xzf "$2" + rm -rf "$local_tarball" + fi +} + +function get_os_type() { + local unameOsOut=$(uname -s) + local osType + case "${unameOsOut}" in + Linux*) osType=linux ;; + Darwin*) osType=darwin ;; + CYGWIN*) osType=windows ;; + MINGW*) osType=windows ;; + *) osType="UNKNOWN:${unameOsOut}" ;; + esac + echo "$osType" +} + +function get_os_arch() { + local unameArchOut="$(uname -m)" + local arch + case "${unameArchOut}" in + x86_64*) arch=amd64 ;; + arm64*) arch=aarch64 ;; + *) arch="UNKNOWN:${unameOsOut}" ;; + esac + echo "$arch" +} + +# Determine the Mvnd version from the root pom.xml file and +# install mvnd under the build/ folder if needed. +function install_mvnd() { + local MVND_VERSION=$(grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}') + local MVN_VERSION=$(grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}') + MVND_BIN="$(command -v mvnd)" + if [ "$MVND_BIN" ]; then + local MVND_DETECTED_VERSION="$(mvnd -v 2>&1 | grep '(mvnd)' | awk '{print $5}')" + local MVN_DETECTED_VERSION="$(mvnd -v 2>&1 | grep 'Apache Maven' | awk 'NR==2 {print $3}')" + fi + # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers + function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } + + if [ $(version $MVND_DETECTED_VERSION) -ne $(version $MVND_VERSION) ]; then + local APACHE_MIRROR=${APACHE_MIRROR:-'https://downloads.apache.org'} + local OS_TYPE=$(get_os_type) + local ARCH=$(get_os_arch) + + install_app \ + "${APACHE_MIRROR}/maven/mvnd/${MVND_VERSION}" \ + "maven-mvnd-${MVND_VERSION}-${OS_TYPE}-${ARCH}.tar.gz" \ + "maven-mvnd-${MVND_VERSION}-${OS_TYPE}-${ARCH}/bin/mvnd" + + MVND_BIN="${_DIR}/maven-mvnd-${MVND_VERSION}-${OS_TYPE}-${ARCH}/bin/mvnd" + else + if [ "$(version $MVN_DETECTED_VERSION)" -ne "$(version $MVN_VERSION)" ]; then + echo "Mvnd $MVND_DETECTED_VERSION embedded maven version $MVN_DETECTED_VERSION is not equivalent to $MVN_VERSION required in pom." + exit 1 + fi + fi +} + +install_mvnd + +cd "${_CALLING_DIR}" + +# Set any `mvn` options if not already present +export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"} + +echo "Using \`mvnd\` from path: $MVND_BIN" 1>&2 + +if [ "$MAVEN_CLI_OPTS" != "" ]; then + echo "MAVEN_CLI_OPTS=$MAVEN_CLI_OPTS" +fi + +${MVND_BIN} $MAVEN_CLI_OPTS "$@" diff --git a/build/release/create-package.sh b/build/release/create-package.sh index c98e7c0f88b..28a89165e80 100755 --- a/build/release/create-package.sh +++ b/build/release/create-package.sh @@ -75,7 +75,7 @@ package_binary() { echo "Creating binary release tarball ${BIN_TGZ_FILE}" - ${KYUUBI_DIR}/build/dist --tgz --spark-provided --flink-provided --hive-provided + ${KYUUBI_DIR}/build/dist --tgz --web-ui --spark-provided --flink-provided --hive-provided cp "${BIN_TGZ_FILE}" "${RELEASE_DIR}" diff --git a/build/release/release.sh b/build/release/release.sh index 4afac386520..fefcce6a913 100755 --- a/build/release/release.sh +++ b/build/release/release.sh @@ -85,7 +85,7 @@ upload_svn_staging() { svn add "${SVN_STAGING_DIR}/${RELEASE_TAG}" - echo "Uploading release tarballs to ${SVN_STAGING_DIR}/${RELEASE_TAG}" + echo "Uploading release tarballs to ${SVN_STAGING_REPO}/${RELEASE_TAG}" ( cd "${SVN_STAGING_DIR}" && \ svn commit --username "${ASF_USERNAME}" --password "${ASF_PASSWORD}" --message "Apache Kyuubi ${RELEASE_TAG}" @@ -94,8 +94,6 @@ upload_svn_staging() { } upload_nexus_staging() { - ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided \ - -s "${KYUUBI_DIR}/build/release/asf-settings.xml" ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.1 \ -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ -pl extensions/spark/kyuubi-extension-spark-3-1 -am @@ -103,8 +101,7 @@ upload_nexus_staging() { -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ -pl extensions/spark/kyuubi-extension-spark-3-2 -am ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.3 \ - -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ - -pl extensions/spark/kyuubi-extension-spark-3-3 -am + -s "${KYUUBI_DIR}/build/release/asf-settings.xml" } finalize_svn() { diff --git a/build/release/script/announce.sh b/build/release/script/announce.sh old mode 100644 new mode 100755 diff --git a/build/release/script/dev_kyuubi_vote.sh b/build/release/script/dev_kyuubi_vote.sh old mode 100644 new mode 100755 diff --git a/charts/kyuubi/Chart.yaml b/charts/kyuubi/Chart.yaml index 6b377ecc5d1..0abec9e5ef3 100644 --- a/charts/kyuubi/Chart.yaml +++ b/charts/kyuubi/Chart.yaml @@ -20,7 +20,7 @@ name: kyuubi description: A Helm chart for Kyuubi server type: application version: 0.1.0 -appVersion: "master-snapshot" +appVersion: 1.7.0 home: https://kyuubi.apache.org icon: https://raw.githubusercontent.com/apache/kyuubi/master/docs/imgs/logo.png sources: diff --git a/charts/kyuubi/README.md b/charts/kyuubi/README.md new file mode 100644 index 00000000000..ef54c322605 --- /dev/null +++ b/charts/kyuubi/README.md @@ -0,0 +1,43 @@ + + +# Helm Chart for Apache Kyuubi + +[Apache Kyuubi](https://airflow.apache.org/) is a distributed and multi-tenant gateway to provide serverless SQL on Data Warehouses and Lakehouses. + + +## Introduction + +This chart will bootstrap an [Kyuubi](https://kyuubi.apache.org) deployment on a [Kubernetes](http://kubernetes.io) +cluster using the [Helm](https://helm.sh) package manager. + +## Requirements + +- Kubernetes cluster +- Helm 3.0+ + + + +## Documentation + +Configuration guide documentation for Kyuubi lives [on the website](https://kyuubi.readthedocs.io/en/master/deployment/settings.html#kyuubi-configurations). (Not just for Helm Chart) + +## Contributing + +Want to help build Apache Kyuubi? Check out our [contributing documentation](https://kyuubi.readthedocs.io/en/master/community/CONTRIBUTING.html). \ No newline at end of file diff --git a/charts/kyuubi/templates/NOTES.txt b/charts/kyuubi/templates/NOTES.txt index 44a35b6b736..2693f5ef6ff 100644 --- a/charts/kyuubi/templates/NOTES.txt +++ b/charts/kyuubi/templates/NOTES.txt @@ -1,21 +1,47 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# +{{/* + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at -Get kyuubi expose URL by running these commands: - export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "kyuubi.fullname" . }}-nodeport) - export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") - echo $NODE_IP:$NODE_PORT \ No newline at end of file + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/}} + +The chart has been installed! + +In order to check the release status, use: + helm status {{ .Release.Name }} -n {{ .Release.Namespace }} + or for more detailed info + helm get all {{ .Release.Name }} -n {{ .Release.Namespace }} + +************************ +******* Services ******* +************************ +{{- range $name, $frontend := .Values.server }} +{{- if $frontend.enabled }} +{{ $name | snakecase | upper }}: +- To access {{ $.Release.Name }}-{{ $name | kebabcase }} service within the cluster, use the following URL: + {{ $.Release.Name }}-{{ $name | kebabcase }}.{{ $.Release.Namespace }}.svc.cluster.local +{{- if $.Values.kyuubiConf.kyuubiDefaults }} +{{- if regexMatch "(^|\\s)kyuubi.frontend.bind.host\\s*=?\\s*(localhost|127\\.0\\.0\\.1)($|\\s)" $.Values.kyuubiConf.kyuubiDefaults }} +- To access {{ $.Release.Name }}-{{ $name | kebabcase }} service from outside the cluster for debugging, run the following command: + kubectl port-forward svc/{{ $.Release.Name }}-{{ $name | kebabcase }} {{ tpl $frontend.service.port $ }}:{{ tpl $frontend.service.port $ }} -n {{ $.Release.Namespace }} + and use 127.0.0.1:{{ tpl $frontend.service.port $ }} +{{- end }} +{{- end }} +{{- if eq $frontend.service.type "NodePort" }} +- To access {{ $.Release.Name }}-{{ $name | kebabcase }} service from outside the cluster through configured NodePort, run the following commands: + export NODE_PORT=$(kubectl get service {{ $.Release.Name }}-{{ $name | kebabcase }} -n {{ $.Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}") + export NODE_IP=$(kubectl get nodes -n {{ $.Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/kyuubi/templates/_helpers.tpl b/charts/kyuubi/templates/_helpers.tpl index 684c1f354b1..cd4865a1288 100644 --- a/charts/kyuubi/templates/_helpers.tpl +++ b/charts/kyuubi/templates/_helpers.tpl @@ -16,33 +16,18 @@ */}} {{/* -Expand the name of the chart. +A comma separated string of enabled frontend protocols, e.g. "REST,THRIFT_BINARY". +For details, see 'kyuubi.frontend.protocols': https://kyuubi.readthedocs.io/en/master/deployment/settings.html#frontend */}} -{{- define "kyuubi.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "kyuubi.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- define "kyuubi.frontend.protocols" -}} +{{- $protocols := list }} +{{- range $name, $frontend := .Values.server }} + {{- if $frontend.enabled }} + {{- $protocols = $name | snakecase | upper | append $protocols }} + {{- end }} {{- end }} +{{- if not $protocols }} + {{ fail "At least one frontend protocol must be enabled!" }} {{- end }} +{{- $protocols | join "," }} {{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "kyuubi.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} \ No newline at end of file diff --git a/charts/kyuubi/templates/kyuubi-configmap.yaml b/charts/kyuubi/templates/kyuubi-configmap.yaml index ada9e3dc876..4964e651cdb 100644 --- a/charts/kyuubi/templates/kyuubi-configmap.yaml +++ b/charts/kyuubi/templates/kyuubi-configmap.yaml @@ -26,22 +26,26 @@ metadata: app.kubernetes.io/version: {{ .Values.image.tag | default .Chart.AppVersion | quote }} app.kubernetes.io/managed-by: {{ .Release.Service }} data: - {{- with .Values.server.conf.kyuubiEnv }} + {{- with .Values.kyuubiConf.kyuubiEnv }} kyuubi-env.sh: | #!/usr/bin/env bash {{- tpl . $ | nindent 4 }} {{- end }} kyuubi-defaults.conf: | ## Helm chart provided Kyuubi configurations - kyuubi.frontend.bind.host={{ .Values.server.bind.host }} - kyuubi.frontend.bind.port={{ .Values.server.bind.port }} kyuubi.kubernetes.namespace={{ .Release.Namespace }} + kyuubi.frontend.connection.url.use.hostname=false + kyuubi.frontend.thrift.binary.bind.port={{ .Values.server.thriftBinary.port }} + kyuubi.frontend.thrift.http.bind.port={{ .Values.server.thriftHttp.port }} + kyuubi.frontend.rest.bind.port={{ .Values.server.rest.port }} + kyuubi.frontend.mysql.bind.port={{ .Values.server.mysql.port }} + kyuubi.frontend.protocols={{ include "kyuubi.frontend.protocols" . }} ## User provided Kyuubi configurations - {{- with .Values.server.conf.kyuubiDefaults }} + {{- with .Values.kyuubiConf.kyuubiDefaults }} {{- tpl . $ | nindent 4 }} {{- end }} - {{- with .Values.server.conf.log4j2 }} + {{- with .Values.kyuubiConf.log4j2 }} log4j2.xml: | {{- tpl . $ | nindent 4 }} {{- end }} diff --git a/charts/kyuubi/templates/kyuubi-deployment.yaml b/charts/kyuubi/templates/kyuubi-deployment.yaml index 941fdf164c6..43899b6fc51 100644 --- a/charts/kyuubi/templates/kyuubi-deployment.yaml +++ b/charts/kyuubi/templates/kyuubi-deployment.yaml @@ -50,6 +50,12 @@ spec: - name: kyuubi-server image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.command }} + command: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with .Values.args }} + args: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} {{- with .Values.env }} env: {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} @@ -57,13 +63,16 @@ spec: envFrom: {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} ports: - - name: frontend-port - containerPort: {{ .Values.server.bind.port }} - protocol: TCP + {{- range $name, $frontend := .Values.server }} + {{- if $frontend.enabled }} + - name: {{ $name | kebabcase }} + containerPort: {{ $frontend.port }} + {{- end }} + {{- end }} {{- if .Values.probe.liveness.enabled }} livenessProbe: - tcpSocket: - port: {{ .Values.server.bind.port }} + exec: + command: ["/bin/bash", "-c", "bin/kyuubi status"] initialDelaySeconds: {{ .Values.probe.liveness.initialDelaySeconds }} periodSeconds: {{ .Values.probe.liveness.periodSeconds }} timeoutSeconds: {{ .Values.probe.liveness.timeoutSeconds }} @@ -72,8 +81,8 @@ spec: {{- end }} {{- if .Values.probe.readiness.enabled }} readinessProbe: - tcpSocket: - port: {{ .Values.server.bind.port }} + exec: + command: ["/bin/bash", "-c", "$KYUUBI_HOME/bin/kyuubi status"] initialDelaySeconds: {{ .Values.probe.readiness.initialDelaySeconds }} periodSeconds: {{ .Values.probe.readiness.periodSeconds }} timeoutSeconds: {{ .Values.probe.readiness.timeoutSeconds }} @@ -85,7 +94,7 @@ spec: {{- end }} volumeMounts: - name: conf - mountPath: {{ .Values.server.confDir }} + mountPath: {{ .Values.kyuubiConfDir }} {{- with .Values.volumeMounts }} {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} diff --git a/charts/kyuubi/templates/kyuubi-service.yaml b/charts/kyuubi/templates/kyuubi-service.yaml index 0152bd23d1f..963f1fcc709 100644 --- a/charts/kyuubi/templates/kyuubi-service.yaml +++ b/charts/kyuubi/templates/kyuubi-service.yaml @@ -15,27 +15,34 @@ # limitations under the License. # +{{- range $name, $frontend := .Values.server }} +{{- if $frontend.enabled }} apiVersion: v1 kind: Service metadata: - name: {{ .Release.Name }} + name: {{ $.Release.Name }}-{{ $name | kebabcase }} labels: - helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} - app.kubernetes.io/name: {{ .Chart.Name }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/version: {{ .Values.image.tag | default .Chart.AppVersion | quote }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - {{- with .Values.service.annotations }} + helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }} + app.kubernetes.io/name: {{ $.Chart.Name }} + app.kubernetes.io/instance: {{ $.Release.Name }} + app.kubernetes.io/version: {{ $.Values.image.tag | default $.Chart.AppVersion | quote }} + app.kubernetes.io/managed-by: {{ $.Release.Service }} + {{- with $frontend.service.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} spec: + type: {{ $frontend.service.type }} ports: - - name: http - nodePort: {{ .Values.service.port }} - port: {{ .Values.server.bind.port }} - protocol: TCP - type: {{ .Values.service.type }} + - name: {{ $name | kebabcase }} + port: {{ tpl $frontend.service.port $ }} + targetPort: {{ $frontend.port }} + {{- if and (eq $frontend.service.type "NodePort") ($frontend.service.nodePort) }} + nodePort: {{ $frontend.service.nodePort }} + {{- end }} selector: - app.kubernetes.io/name: {{ .Chart.Name }} - app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: {{ $.Chart.Name }} + app.kubernetes.io/instance: {{ $.Release.Name }} +--- +{{- end }} +{{- end }} diff --git a/charts/kyuubi/values.yaml b/charts/kyuubi/values.yaml index 22ae9d5a914..7eca7211393 100644 --- a/charts/kyuubi/values.yaml +++ b/charts/kyuubi/values.yaml @@ -24,7 +24,7 @@ replicaCount: 2 image: repository: apache/kyuubi - pullPolicy: Always + pullPolicy: IfNotPresent tag: ~ imagePullSecrets: [] @@ -58,22 +58,83 @@ probe: successThreshold: 1 server: - bind: - host: 0.0.0.0 + # Thrift Binary protocol (HiveServer2 compatible) + thriftBinary: + enabled: true port: 10009 - confDir: /opt/kyuubi/conf - conf: - # The value (templated string) is used for kyuubi-env.sh file - # See https://kyuubi.apache.org/docs/latest/deployment/settings.html#environments for more details - kyuubiEnv: ~ - - # The value (templated string) is used for kyuubi-defaults.conf file - # See https://kyuubi.apache.org/docs/latest/deployment/settings.html#kyuubi-configurations for more details - kyuubiDefaults: ~ - - # The value (templated string) is used for log4j2.xml file - # See https://kyuubi.apache.org/docs/latest/deployment/settings.html#logging for more details - log4j2: ~ + service: + type: ClusterIP + port: "{{ .Values.server.thriftBinary.port }}" + nodePort: ~ + annotations: {} + + # Thrift HTTP protocol (HiveServer2 compatible) + thriftHttp: + enabled: false + port: 10010 + service: + type: ClusterIP + port: "{{ .Values.server.thriftHttp.port }}" + nodePort: ~ + annotations: {} + + # REST API protocol (experimental) + rest: + enabled: true + port: 10099 + service: + type: ClusterIP + port: "{{ .Values.server.rest.port }}" + nodePort: ~ + annotations: {} + + # MySQL compatible text protocol (experimental) + mysql: + enabled: false + port: 3309 + service: + type: ClusterIP + port: "{{ .Values.server.mysql.port }}" + nodePort: ~ + annotations: {} + +kyuubiConfDir: /opt/kyuubi/conf +kyuubiConf: + # The value (templated string) is used for kyuubi-env.sh file + # Example: + # + # kyuubiEnv: | + # export JAVA_HOME=/usr/jdk64/jdk1.8.0_152 + # export SPARK_HOME=/opt/spark + # export FLINK_HOME=/opt/flink + # export HIVE_HOME=/opt/hive + # + # See example at conf/kyuubi-env.sh.template and https://kyuubi.readthedocs.io/en/master/deployment/settings.html#environments for more details + kyuubiEnv: ~ + + # The value (templated string) is used for kyuubi-defaults.conf file + # Example: + # + # kyuubiDefaults: | + # kyuubi.authentication=NONE + # kyuubi.frontend.bind.host=10.0.0.1 + # kyuubi.engine.type=SPARK_SQL + # kyuubi.engine.share.level=USER + # kyuubi.session.engine.initialize.timeout=PT3M + # kyuubi.ha.addresses=zk1:2181,zk2:2181,zk3:2181 + # kyuubi.ha.namespace=kyuubi + # + # See https://kyuubi.readthedocs.io/en/master/deployment/settings.html#kyuubi-configurations for more details + kyuubiDefaults: ~ + + # The value (templated string) is used for log4j2.xml file + # See example at conf/log4j2.xml.template https://kyuubi.readthedocs.io/en/master/deployment/settings.html#logging for more details + log4j2: ~ + +# Command to launch Kyuubi server (templated) +command: ~ +# Arguments to launch Kyuubi server (templated) +args: ~ # Environment variables (templated) env: [] @@ -89,15 +150,6 @@ initContainers: [] # Additional containers for Kyuubi pod (templated) containers: [] -service: - type: NodePort - # The default port limit of kubernetes is 30000-32767 - # to change: - # vim kube-apiserver.yaml (usually under path: /etc/kubernetes/manifests/) - # add or change line 'service-node-port-range=1-32767' under kube-apiserver - port: 30009 - annotations: {} - resources: {} # Used to specify resource, default unlimited. # If you do want to specify resources: diff --git a/conf/kyuubi-defaults.conf.template b/conf/kyuubi-defaults.conf.template index 5a4b9b2a791..c93971d9150 100644 --- a/conf/kyuubi-defaults.conf.template +++ b/conf/kyuubi-defaults.conf.template @@ -18,9 +18,19 @@ ## Kyuubi Configurations # -# kyuubi.authentication NONE -# kyuubi.frontend.bind.host localhost -# kyuubi.frontend.bind.port 10009 +# kyuubi.authentication NONE +# +# kyuubi.frontend.bind.host 10.0.0.1 +# kyuubi.frontend.protocols THRIFT_BINARY,REST +# kyuubi.frontend.thrift.binary.bind.port 10009 +# kyuubi.frontend.rest.bind.port 10099 +# +# kyuubi.engine.type SPARK_SQL +# kyuubi.engine.share.level USER +# kyuubi.session.engine.initialize.timeout PT3M +# +# kyuubi.ha.addresses zk1:2181,zk2:2181,zk3:2181 +# kyuubi.ha.namespace kyuubi # -# Details in https://kyuubi.apache.org/docs/latest/deployment/settings.html +# Details in https://kyuubi.readthedocs.io/en/master/deployment/settings.html diff --git a/dev/dependencyList b/dev/dependencyList index 440f8a44773..ab7697d3516 100644 --- a/dev/dependencyList +++ b/dev/dependencyList @@ -16,11 +16,16 @@ # HikariCP/4.0.3//HikariCP-4.0.3.jar +ST4/4.3.4//ST4-4.3.4.jar animal-sniffer-annotations/1.21//animal-sniffer-annotations-1.21.jar annotations/4.1.1.4//annotations-4.1.1.4.jar +antlr-runtime/3.5.3//antlr-runtime-3.5.3.jar antlr4-runtime/4.9.3//antlr4-runtime-4.9.3.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -automaton/1.11-8//automaton-1.11-8.jar +arrow-format/11.0.0//arrow-format-11.0.0.jar +arrow-memory-core/11.0.0//arrow-memory-core-11.0.0.jar +arrow-memory-netty/11.0.0//arrow-memory-netty-11.0.0.jar +arrow-vector/11.0.0//arrow-vector-11.0.0.jar classgraph/4.8.138//classgraph-4.8.138.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar @@ -34,8 +39,8 @@ derby/10.14.2.0//derby-10.14.2.0.jar error_prone_annotations/2.14.0//error_prone_annotations-2.14.0.jar failsafe/2.4.4//failsafe-2.4.4.jar failureaccess/1.0.1//failureaccess-1.0.1.jar +flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar fliptables/1.0.2//fliptables-1.0.2.jar -generex/1.0.2//generex-1.0.2.jar grpc-api/1.48.0//grpc-api-1.48.0.jar grpc-context/1.48.0//grpc-context-1.48.0.jar grpc-core/1.48.0//grpc-core-1.48.0.jar @@ -44,8 +49,8 @@ grpc-netty/1.48.0//grpc-netty-1.48.0.jar grpc-protobuf-lite/1.48.0//grpc-protobuf-lite-1.48.0.jar grpc-protobuf/1.48.0//grpc-protobuf-1.48.0.jar grpc-stub/1.48.0//grpc-stub-1.48.0.jar -gson/2.8.9//gson-2.8.9.jar -guava/30.1-jre//guava-30.1-jre.jar +gson/2.9.0//gson-2.9.0.jar +guava/31.1-jre//guava-31.1-jre.jar hadoop-client-api/3.3.4//hadoop-client-api-3.3.4.jar hadoop-client-runtime/3.3.4//hadoop-client-runtime-3.3.4.jar hive-common/3.1.3//hive-common-3.1.3.jar @@ -59,19 +64,20 @@ hive-storage-api/2.7.0//hive-storage-api-2.7.0.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar hk2-utils/2.6.1//hk2-utils-2.6.1.jar -httpclient/4.5.13//httpclient-4.5.13.jar -httpcore/4.4.15//httpcore-4.4.15.jar +httpclient/4.5.14//httpclient-4.5.14.jar +httpcore/4.4.16//httpcore-4.4.16.jar +httpmime/4.5.14//httpmime-4.5.14.jar j2objc-annotations/1.3//j2objc-annotations-1.3.jar -jackson-annotations/2.14.1//jackson-annotations-2.14.1.jar -jackson-core/2.14.1//jackson-core-2.14.1.jar -jackson-databind/2.14.1//jackson-databind-2.14.1.jar -jackson-dataformat-yaml/2.14.1//jackson-dataformat-yaml-2.14.1.jar -jackson-datatype-jdk8/2.12.3//jackson-datatype-jdk8-2.12.3.jar -jackson-datatype-jsr310/2.14.1//jackson-datatype-jsr310-2.14.1.jar -jackson-jaxrs-base/2.14.1//jackson-jaxrs-base-2.14.1.jar -jackson-jaxrs-json-provider/2.14.1//jackson-jaxrs-json-provider-2.14.1.jar -jackson-module-jaxb-annotations/2.14.1//jackson-module-jaxb-annotations-2.14.1.jar -jackson-module-scala_2.12/2.14.1//jackson-module-scala_2.12-2.14.1.jar +jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar +jackson-core/2.14.2//jackson-core-2.14.2.jar +jackson-databind/2.14.2//jackson-databind-2.14.2.jar +jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar +jackson-datatype-jdk8/2.14.2//jackson-datatype-jdk8-2.14.2.jar +jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar +jackson-jaxrs-base/2.14.2//jackson-jaxrs-base-2.14.2.jar +jackson-jaxrs-json-provider/2.14.2//jackson-jaxrs-json-provider-2.14.2.jar +jackson-module-jaxb-annotations/2.14.2//jackson-module-jaxb-annotations-2.14.2.jar +jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.4//jakarta.servlet-api-4.0.4.jar @@ -80,13 +86,14 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar -jersey-client/2.38//jersey-client-2.38.jar -jersey-common/2.38//jersey-common-2.38.jar -jersey-container-servlet-core/2.38//jersey-container-servlet-core-2.38.jar -jersey-entity-filtering/2.38//jersey-entity-filtering-2.38.jar -jersey-hk2/2.38//jersey-hk2-2.38.jar -jersey-media-json-jackson/2.38//jersey-media-json-jackson-2.38.jar -jersey-server/2.38//jersey-server-2.38.jar +jersey-client/2.39//jersey-client-2.39.jar +jersey-common/2.39//jersey-common-2.39.jar +jersey-container-servlet-core/2.39//jersey-container-servlet-core-2.39.jar +jersey-entity-filtering/2.39//jersey-entity-filtering-2.39.jar +jersey-hk2/2.39//jersey-hk2-2.39.jar +jersey-media-json-jackson/2.39//jersey-media-json-jackson-2.39.jar +jersey-media-multipart/2.39//jersey-media-multipart-2.39.jar +jersey-server/2.39//jersey-server-2.39.jar jetcd-api/0.7.3//jetcd-api-0.7.3.jar jetcd-common/0.7.3//jetcd-common-0.7.3.jar jetcd-core/0.7.3//jetcd-core-0.7.3.jar @@ -100,55 +107,59 @@ jetty-util-ajax/9.4.50.v20221201//jetty-util-ajax-9.4.50.v20221201.jar jetty-util/9.4.50.v20221201//jetty-util-9.4.50.v20221201.jar jline/0.9.94//jline-0.9.94.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar -kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar -kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar -kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar -kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar -kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar -kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar -kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar -kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar -kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar -kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar -kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar -kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar -kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar -kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar -kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar -kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar -kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar -kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar -kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar -kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar -kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar +kubernetes-client-api/6.4.1//kubernetes-client-api-6.4.1.jar +kubernetes-client/6.4.1//kubernetes-client-6.4.1.jar +kubernetes-httpclient-okhttp/6.4.1//kubernetes-httpclient-okhttp-6.4.1.jar +kubernetes-model-admissionregistration/6.4.1//kubernetes-model-admissionregistration-6.4.1.jar +kubernetes-model-apiextensions/6.4.1//kubernetes-model-apiextensions-6.4.1.jar +kubernetes-model-apps/6.4.1//kubernetes-model-apps-6.4.1.jar +kubernetes-model-autoscaling/6.4.1//kubernetes-model-autoscaling-6.4.1.jar +kubernetes-model-batch/6.4.1//kubernetes-model-batch-6.4.1.jar +kubernetes-model-certificates/6.4.1//kubernetes-model-certificates-6.4.1.jar +kubernetes-model-common/6.4.1//kubernetes-model-common-6.4.1.jar +kubernetes-model-coordination/6.4.1//kubernetes-model-coordination-6.4.1.jar +kubernetes-model-core/6.4.1//kubernetes-model-core-6.4.1.jar +kubernetes-model-discovery/6.4.1//kubernetes-model-discovery-6.4.1.jar +kubernetes-model-events/6.4.1//kubernetes-model-events-6.4.1.jar +kubernetes-model-extensions/6.4.1//kubernetes-model-extensions-6.4.1.jar +kubernetes-model-flowcontrol/6.4.1//kubernetes-model-flowcontrol-6.4.1.jar +kubernetes-model-gatewayapi/6.4.1//kubernetes-model-gatewayapi-6.4.1.jar +kubernetes-model-metrics/6.4.1//kubernetes-model-metrics-6.4.1.jar +kubernetes-model-networking/6.4.1//kubernetes-model-networking-6.4.1.jar +kubernetes-model-node/6.4.1//kubernetes-model-node-6.4.1.jar +kubernetes-model-policy/6.4.1//kubernetes-model-policy-6.4.1.jar +kubernetes-model-rbac/6.4.1//kubernetes-model-rbac-6.4.1.jar +kubernetes-model-scheduling/6.4.1//kubernetes-model-scheduling-6.4.1.jar +kubernetes-model-storageclass/6.4.1//kubernetes-model-storageclass-6.4.1.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.9.3//libthrift-0.9.3.jar -log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar -log4j-api/2.19.0//log4j-api-2.19.0.jar -log4j-core/2.19.0//log4j-core-2.19.0.jar -log4j-slf4j-impl/2.19.0//log4j-slf4j-impl-2.19.0.jar +log4j-1.2-api/2.20.0//log4j-1.2-api-2.20.0.jar +log4j-api/2.20.0//log4j-api-2.20.0.jar +log4j-core/2.20.0//log4j-core-2.20.0.jar +log4j-slf4j-impl/2.20.0//log4j-slf4j-impl-2.20.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar metrics-core/4.2.8//metrics-core-4.2.8.jar metrics-jmx/4.2.8//metrics-jmx-4.2.8.jar metrics-json/4.2.8//metrics-json-4.2.8.jar metrics-jvm/4.2.8//metrics-jvm-4.2.8.jar -netty-all/4.1.84.Final//netty-all-4.1.84.Final.jar -netty-buffer/4.1.84.Final//netty-buffer-4.1.84.Final.jar -netty-codec-dns/4.1.84.Final//netty-codec-dns-4.1.84.Final.jar -netty-codec-http/4.1.84.Final//netty-codec-http-4.1.84.Final.jar -netty-codec-http2/4.1.84.Final//netty-codec-http2-4.1.84.Final.jar -netty-codec-socks/4.1.84.Final//netty-codec-socks-4.1.84.Final.jar -netty-codec/4.1.84.Final//netty-codec-4.1.84.Final.jar -netty-common/4.1.84.Final//netty-common-4.1.84.Final.jar -netty-handler-proxy/4.1.84.Final//netty-handler-proxy-4.1.84.Final.jar -netty-handler/4.1.84.Final//netty-handler-4.1.84.Final.jar -netty-resolver-dns/4.1.84.Final//netty-resolver-dns-4.1.84.Final.jar -netty-resolver/4.1.84.Final//netty-resolver-4.1.84.Final.jar -netty-transport-classes-epoll/4.1.84.Final//netty-transport-classes-epoll-4.1.84.Final.jar -netty-transport-native-epoll/4.1.84.Final/linux-aarch_64/netty-transport-native-epoll-4.1.84.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.1.84.Final/linux-x86_64/netty-transport-native-epoll-4.1.84.Final-linux-x86_64.jar -netty-transport-native-unix-common/4.1.84.Final//netty-transport-native-unix-common-4.1.84.Final.jar -netty-transport/4.1.84.Final//netty-transport-4.1.84.Final.jar +mimepull/1.9.15//mimepull-1.9.15.jar +netty-all/4.1.89.Final//netty-all-4.1.89.Final.jar +netty-buffer/4.1.89.Final//netty-buffer-4.1.89.Final.jar +netty-codec-dns/4.1.89.Final//netty-codec-dns-4.1.89.Final.jar +netty-codec-http/4.1.89.Final//netty-codec-http-4.1.89.Final.jar +netty-codec-http2/4.1.89.Final//netty-codec-http2-4.1.89.Final.jar +netty-codec-socks/4.1.89.Final//netty-codec-socks-4.1.89.Final.jar +netty-codec/4.1.89.Final//netty-codec-4.1.89.Final.jar +netty-common/4.1.89.Final//netty-common-4.1.89.Final.jar +netty-handler-proxy/4.1.89.Final//netty-handler-proxy-4.1.89.Final.jar +netty-handler/4.1.89.Final//netty-handler-4.1.89.Final.jar +netty-resolver-dns/4.1.89.Final//netty-resolver-dns-4.1.89.Final.jar +netty-resolver/4.1.89.Final//netty-resolver-4.1.89.Final.jar +netty-transport-classes-epoll/4.1.89.Final//netty-transport-classes-epoll-4.1.89.Final.jar +netty-transport-native-epoll/4.1.89.Final/linux-aarch_64/netty-transport-native-epoll-4.1.89.Final-linux-aarch_64.jar +netty-transport-native-epoll/4.1.89.Final/linux-x86_64/netty-transport-native-epoll-4.1.89.Final-linux-x86_64.jar +netty-transport-native-unix-common/4.1.89.Final//netty-transport-native-unix-common-4.1.89.Final.jar +netty-transport/4.1.89.Final//netty-transport-4.1.89.Final.jar okhttp-urlconnection/3.14.9//okhttp-urlconnection-3.14.9.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.15.0//okio-1.15.0.jar @@ -169,7 +180,7 @@ simpleclient_tracer_common/0.16.0//simpleclient_tracer_common-0.16.0.jar simpleclient_tracer_otel/0.16.0//simpleclient_tracer_otel-0.16.0.jar simpleclient_tracer_otel_agent/0.16.0//simpleclient_tracer_otel_agent-0.16.0.jar slf4j-api/1.7.36//slf4j-api-1.7.36.jar -snakeyaml/1.31//snakeyaml-1.31.jar +snakeyaml/1.33//snakeyaml-1.33.jar swagger-annotations/2.2.1//swagger-annotations-2.2.1.jar swagger-core/2.2.1//swagger-core-2.2.1.jar swagger-integration/2.2.1//swagger-integration-2.2.1.jar diff --git a/dev/kyuubi-codecov/pom.xml b/dev/kyuubi-codecov/pom.xml index 1d1dcb574b5..ba15ec0f823 100644 --- a/dev/kyuubi-codecov/pom.xml +++ b/dev/kyuubi-codecov/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml diff --git a/dev/kyuubi-tpcds/README.md b/dev/kyuubi-tpcds/README.md index adffb6726bd..a9a6487aa12 100644 --- a/dev/kyuubi-tpcds/README.md +++ b/dev/kyuubi-tpcds/README.md @@ -1,21 +1,22 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Introduction + This module includes TPC-DS data generator and benchmark tool. # How to use @@ -27,12 +28,12 @@ package jar with following command: Support options: -| key | default | description | -|--------------|-----------------|-----------------------------------| -| db | default | the database to write data | -| scaleFactor | 1 | the scale factor of TPC-DS | -| format | parquet | the format of table to store data | -| parallel | scaleFactor * 2 | the parallelism of Spark job | +| key | default | description | +|-------------|-----------------|-----------------------------------| +| db | default | the database to write data | +| scaleFactor | 1 | the scale factor of TPC-DS | +| format | parquet | the format of table to store data | +| parallel | scaleFactor * 2 | the parallelism of Spark job | Example: the following command to generate 10GB data with new database `tpcds_sf10`. @@ -47,7 +48,7 @@ $SPARK_HOME/bin/spark-submit \ Support options: -| key | default | description | +| key | default | description | |-------------|------------------------|---------------------------------------------------------------| | db | none(required) | the TPC-DS database | | benchmark | tpcds-v2.4-benchmark | the name of application | @@ -65,6 +66,7 @@ $SPARK_HOME/bin/spark-submit \ ``` We also support run one of the TPC-DS query: + ```shell $SPARK_HOME/bin/spark-submit \ --class org.apache.kyuubi.tpcds.benchmark.RunBenchmark \ @@ -73,6 +75,7 @@ $SPARK_HOME/bin/spark-submit \ The result of TPC-DS benchmark like: -| name | minTimeMs | maxTimeMs | avgTimeMs | stdDev | stdDevPercent | -|---------|-----------|-------------|------------|----------|----------------| -| q1-v2.4 | 50.522384 | 868.010383 | 323.398267 | 471.6482 | 145.8413108576 | +| name | minTimeMs | maxTimeMs | avgTimeMs | stdDev | stdDevPercent | +|---------|-----------|------------|------------|----------|----------------| +| q1-v2.4 | 50.522384 | 868.010383 | 323.398267 | 471.6482 | 145.8413108576 | + diff --git a/dev/kyuubi-tpcds/pom.xml b/dev/kyuubi-tpcds/pom.xml index 2921cbe8b0f..1bc69f9f2ce 100644 --- a/dev/kyuubi-tpcds/pom.xml +++ b/dev/kyuubi-tpcds/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml diff --git a/docker/Dockerfile b/docker/Dockerfile index 588f99b1fb5..0440022de64 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -24,7 +24,7 @@ # -t the target repo and tag name # more options can be found with -h -ARG BASE_IMAGE=openjdk:8-jre-slim +ARG BASE_IMAGE=eclipse-temurin:8-jdk-focal ARG spark_provided="spark_builtin" FROM ${BASE_IMAGE} as builder_spark_provided @@ -34,7 +34,7 @@ ONBUILD ENV SPARK_HOME ${spark_home_in_docker} FROM ${BASE_IMAGE} as builder_spark_builtin ONBUILD ENV SPARK_HOME /opt/spark -ONBUILD RUN mkdir -p ${SPARK_HOME} +ONBUILD RUN mkdir -p ${SPARK_HOME} ONBUILD COPY spark-binary ${SPARK_HOME} FROM builder_${spark_provided} @@ -50,7 +50,8 @@ ENV KYUUBI_WORK_DIR_ROOT ${KYUUBI_HOME}/work RUN set -ex && \ sed -i 's/http:\/\/deb.\(.*\)/https:\/\/deb.\1/g' /etc/apt/sources.list && \ apt-get update && \ - apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps && \ + apt-get install -y bash tini libc6 libpam-modules krb5-user libnss3 procps && \ + ln -snf /bin/bash /bin/sh && \ useradd -u ${kyuubi_uid} -g root kyuubi -d /home/kyuubi -m && \ mkdir -p ${KYUUBI_HOME} ${KYUUBI_LOG_DIR} ${KYUUBI_PID_DIR} ${KYUUBI_WORK_DIR_ROOT} && \ rm -rf /var/cache/apt/* @@ -59,6 +60,7 @@ COPY LICENSE NOTICE RELEASE ${KYUUBI_HOME}/ COPY bin ${KYUUBI_HOME}/bin COPY jars ${KYUUBI_HOME}/jars COPY beeline-jars ${KYUUBI_HOME}/beeline-jars +COPY web-ui ${KYUUBI_HOME}/web-ui COPY externals/engines/spark ${KYUUBI_HOME}/externals/engines/spark WORKDIR ${KYUUBI_HOME} diff --git a/docker/kyuubi-configmap.yaml b/docker/kyuubi-configmap.yaml index 13835493b8f..9b799359625 100644 --- a/docker/kyuubi-configmap.yaml +++ b/docker/kyuubi-configmap.yaml @@ -52,4 +52,4 @@ data: # kyuubi.frontend.bind.port 10009 # - # Details in https://kyuubi.apache.org/docs/latest/deployment/settings.html + # Details in https://kyuubi.readthedocs.io/en/master/deployment/settings.html diff --git a/docker/playground/.env b/docker/playground/.env index d50e964cf16..ea214551182 100644 --- a/docker/playground/.env +++ b/docker/playground/.env @@ -15,16 +15,16 @@ # limitations under the License. # -AWS_JAVA_SDK_VERSION=1.12.239 -HADOOP_VERSION=3.3.1 +AWS_JAVA_SDK_VERSION=1.12.316 +HADOOP_VERSION=3.3.5 HIVE_VERSION=2.3.9 -ICEBERG_VERSION=1.1.0 -KYUUBI_VERSION=1.6.1-incubating +ICEBERG_VERSION=1.2.0 +KYUUBI_VERSION=1.7.0 KYUUBI_HADOOP_VERSION=3.3.4 POSTGRES_VERSION=12 POSTGRES_JDBC_VERSION=42.3.4 SCALA_BINARY_VERSION=2.12 -SPARK_VERSION=3.3.1 +SPARK_VERSION=3.3.2 SPARK_BINARY_VERSION=3.3 SPARK_HADOOP_VERSION=3.3.2 ZOOKEEPER_VERSION=3.6.3 diff --git a/docker/playground/README.md b/docker/playground/README.md index d9e227c2c7e..66dca2af0ab 100644 --- a/docker/playground/README.md +++ b/docker/playground/README.md @@ -1,5 +1,5 @@ Playground -=== +========== ## For Users @@ -45,3 +45,4 @@ Kyuubi supply some built-in dataset, after Kyuubi started, you can run the follo 1. Build images `docker/playground/build-image.sh`; 2. Optional to use `buildx` to build and publish cross-platform images `BUILDX=1 docker/playground/build-image.sh`; + diff --git a/docker/playground/build-image.sh b/docker/playground/build-image.sh index 84845125732..98b16fd0394 100755 --- a/docker/playground/build-image.sh +++ b/docker/playground/build-image.sh @@ -64,7 +64,6 @@ ${BUILD_CMD} \ --build-arg MAVEN_MIRROR=${MAVEN_MIRROR} \ --build-arg KYUUBI_VERSION=${KYUUBI_VERSION} \ --build-arg AWS_JAVA_SDK_VERSION=${AWS_JAVA_SDK_VERSION} \ - --build-arg CLICKHOUSE_JDBC_VERSION=${CLICKHOUSE_JDBC_VERSION} \ --build-arg SPARK_HADOOP_VERSION=${SPARK_HADOOP_VERSION} \ --build-arg ICEBERG_VERSION=${ICEBERG_VERSION} \ --build-arg POSTGRES_JDBC_VERSION=${POSTGRES_JDBC_VERSION} \ diff --git a/docker/playground/compose.yml b/docker/playground/compose.yml index 069624ee2a9..b0d2b1ea89f 100644 --- a/docker/playground/compose.yml +++ b/docker/playground/compose.yml @@ -17,7 +17,7 @@ services: minio: - image: alekcander/bitnami-minio-multiarch:RELEASE.2022-05-26T05-48-41Z + image: bitnami/minio:2023-debian-11 environment: MINIO_ROOT_USER: minio MINIO_ROOT_PASSWORD: minio_minio @@ -68,6 +68,7 @@ services: ports: - 4040-4050:4040-4050 - 10009:10009 + - 10099:10099 volumes: - ./conf/core-site.xml:/etc/hadoop/conf/core-site.xml - ./conf/hive-site.xml:/etc/hive/conf/hive-site.xml diff --git a/docker/playground/conf/kyuubi-defaults.conf b/docker/playground/conf/kyuubi-defaults.conf index 4906c5de4c0..15b3fbf6e4b 100644 --- a/docker/playground/conf/kyuubi-defaults.conf +++ b/docker/playground/conf/kyuubi-defaults.conf @@ -18,8 +18,10 @@ ## Kyuubi Configurations kyuubi.authentication=NONE -kyuubi.frontend.thrift.binary.bind.host=0.0.0.0 +kyuubi.frontend.bind.host=0.0.0.0 +kyuubi.frontend.protocols=THRIFT_BINARY,REST kyuubi.frontend.thrift.binary.bind.port=10009 +kyuubi.frontend.rest.bind.port=10099 kyuubi.ha.addresses=zookeeper:2181 kyuubi.session.engine.idle.timeout=PT5M kyuubi.operation.incremental.collect=true diff --git a/docs/appendix/terminology.md b/docs/appendix/terminology.md index 77d4deace33..b81fa25fe87 100644 --- a/docs/appendix/terminology.md +++ b/docs/appendix/terminology.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Terminologies @@ -26,7 +26,7 @@ Kyuubi is a unified multi-tenant JDBC interface for large-scale data processing > The Java Database Connectivity (JDBC) API is the industry standard for database-independent connectivity between the Java programming language and a wide range of databases SQL databases and other tabular data sources, > such as spreadsheets or flat files. > The JDBC API provides a call-level API for SQL-based database access. - +> > JDBC technology allows you to use the Java programming language to exploit "Write Once, Run Anywhere" capabilities for applications that require access to enterprise data. > With a JDBC technology-enabled driver, you can connect all corporate data even in a heterogeneous environment. @@ -121,7 +121,7 @@ As an enterprise service, SLA commitment is essential. Deploying Kyuubi in High #### Apache Curator -> Apache Curator is a Java/JVM client library for Apache ZooKeeper, a distributed coordination service. It includes a highlevel API framework and utilities to make using Apache ZooKeeper much easier and more reliable. It also includes recipes for common use cases and extensions such as service discovery and a Java 8 asynchronous DSL. +> Apache Curator is a Java/JVM client library for Apache ZooKeeper, a distributed coordination service. It includes a high-level API framework and utilities to make using Apache ZooKeeper much easier and more reliable. It also includes recipes for common use cases and extensions such as service discovery and a Java 8 asynchronous DSL.

@@ -139,7 +139,7 @@ Kyuubi unifies DataLake & LakeHouse access in the simplest pure SQL way, meanwhi

-http://iceberg.apache.org/ +https://iceberg.apache.org/

@@ -162,3 +162,4 @@ Kyuubi unifies DataLake & LakeHouse access in the simplest pure SQL way, meanwhi https://hudi.apache.org

+ diff --git a/docs/client/advanced/features/plan_only.md b/docs/client/advanced/features/plan_only.md index 9f9431164ab..bcedb2f025f 100644 --- a/docs/client/advanced/features/plan_only.md +++ b/docs/client/advanced/features/plan_only.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Plan Only Execution Mode @@ -23,24 +23,31 @@ Configure the kyuubi.operation.plan.only.mode parameter, the value can be 'parse ## Application-level parameter setting -You can add parameters to the URL when establishing a JDBC connection, the parameter is kyuubi.operation.plan.only.mode=parse/analyze/optimize. +You can add parameters to the URL when establishing a JDBC connection, the parameter is kyuubi.operation.plan.only.mode=parse/analyze/optimize. JDBC URLs have the following format: + ```shell - jdbc:hive2://:/;?kyuubi.operation.plan.only.mode=parse/analyze/optimize/optimize_with_stats/physical/execution/none;#<[spark|hive]Vars> +jdbc:hive2://:/;?kyuubi.operation.plan.only.mode=parse/analyze/optimize/optimize_with_stats/physical/execution/none;#<[spark|hive]Vars> ``` + Refer to [hive_jdbc doc](../../jdbc/hive_jdbc.md) for details of others parameters ### Example: -Using beeline tool to connect to the local service, the Shell command is: +Using beeline tool to connect to the local service, the Shell command is: + ```shell - beeline -u 'jdbc:hive2://0.0.0.0:10009/default?kyuubi.operation.plan.only.mode=parse' -n {user_name} +beeline -u 'jdbc:hive2://0.0.0.0:10009/default?kyuubi.operation.plan.only.mode=parse' -n {user_name} ``` + Running the following SQL: + ```sql SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id ``` + The results are as follows: + ```shell # SQL: 0: jdbc:hive2://0.0.0.0:10009/default> SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id; @@ -62,15 +69,20 @@ The results are as follows: ## Session-level parameter setting You can also set the kyuubi.operation.plan.only.mode parameter by executing the set command after the connection has been established + ```shell - beeline -u 'jdbc:hive2://0.0.0.0:10009/default' -n {user_name} +beeline -u 'jdbc:hive2://0.0.0.0:10009/default' -n {user_name} ``` + Running the following SQL: + ```sql set kyuubi.operation.plan.only.mode=parse; SELECT * FROM t1 LEFT JOIN t2 ON t1.id = t2.id ``` + The results are as follows: + ```shell #set command: 0: jdbc:hive2://0.0.0.0:10009/default> set kyuubi.operation.plan.only.mode=parse; @@ -99,3 +111,4 @@ The results are as follows: 1 row selected (0.404 seconds) 0: jdbc:hive2://0.0.0.0:10009/default> ``` + diff --git a/docs/client/advanced/kerberos.md b/docs/client/advanced/kerberos.md index 6bdcd765979..4962dd2c8b2 100644 --- a/docs/client/advanced/kerberos.md +++ b/docs/client/advanced/kerberos.md @@ -1,24 +1,24 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Configure Kerberos for clients to Access Kerberized Kyuubi ## Instructions + When Kyuubi is secured by Kerberos, the authentication procedure becomes a little complicated. ![](../../imgs/kyuubi_kerberos_authentication.png) @@ -35,15 +35,18 @@ The graph above shows a simplified kerberos authentication procedure: In the rest part of this page, we will describe steps needed to pass through this authentication. ## Install Kerberos Client + Usually, Kerberos client is installed as default. You can validate it using klist tool. Linux command and output: + ```bash $ klist -V Kerberos 5 version 1.15.1 ``` MacOS command and output: + ```bash $ klist --version klist (Heimdal 1.5.1apple1) @@ -52,32 +55,35 @@ Send bug-reports to heimdal-bugs@h5l.org ``` Windows command and output: + ```cmd > klist -V Kerberos for Windows ``` If the client is not installed, you should install it ahead based on the OS platform. -We recommend you to install the MIT Kerberos Distribution as all commands in this guide is based on it. +We recommend you to install the MIT Kerberos Distribution as all commands in this guide is based on it. ## Configure Kerberos Client + Kerberos client needs a configuration file for tuning up the creation of Kerberos ticket cache. Following is the configuration file's default location on different OS: -OS | Path ----| --- -Linux | /etc/krb5.conf -MacOS | /etc/krb5.conf -Windows | %ProgramData%\MIT\Kerberos5\krb5.ini +| OS | Path | +|---------|--------------------------------------| +| Linux | /etc/krb5.conf | +| MacOS | /etc/krb5.conf | +| Windows | %ProgramData%\MIT\Kerberos5\krb5.ini | You can use `KRB5_CONFIG` environment variable to overwrite the default location. The configuration file should be configured to point to the same KDC as Kyuubi points to. ## Get Kerberos TGT + Execute `kinit` command to get TGT from KDC. -Suppose user principal is `kyuubi_user@KYUUBI.APACHE.ORG` and user keytab file name is `kyuubi_user.keytab`, +Suppose user principal is `kyuubi_user@KYUUBI.APACHE.ORG` and user keytab file name is `kyuubi_user.keytab`, the command should be: ``` @@ -111,28 +117,29 @@ Valid starting Expires Service principal (Command is identical on different OS platform. Ticket cache location may be different.) ``` -Ticket cache may have different storage type on different OS platform. +Ticket cache may have different storage type on different OS platform. For example, -OS | Default Ticket Cache Type and Location ----| --- -Linux | FILE:/tmp/krb5cc_%{uid} -MacOS | KCM:%{uid}:%{gid} -Windows | API:krb5cc +| OS | Default Ticket Cache Type and Location | +|---------|----------------------------------------| +| Linux | FILE:/tmp/krb5cc_%{uid} | +| MacOS | KCM:%{uid}:%{gid} | +| Windows | API:krb5cc | You can find your ticket cache type and location in the `Ticket cache` part of `klist` output. **Note**: - Ensure your ticket cache type is `FILE` as JVM can only read ticket cache stored as file. -- Do not store TGT into default ticket cache if you are running Kyuubi and execute `kinit` on the same +- Do not store TGT into default ticket cache if you are running Kyuubi and execute `kinit` on the same host with the same OS user. The default ticket cache is already used by Kyuubi server. -Either because the default ticket cache is not a file, or because it is used by Kyuubi server, you +Either because the default ticket cache is not a file, or because it is used by Kyuubi server, you should store ticket cache in another file location. This can be achieved by specifying a file location with `-c` argument in `kinit` command. For example, + ``` $ kinit -c /tmp/krb5cc_beeline -kt kyuubi_user.keytab kyuubi_user@KYUUBI.APACHE.ORG @@ -142,6 +149,7 @@ $ kinit -c /tmp/krb5cc_beeline -kt kyuubi_user.keytab kyuubi_user@KYUUBI.APACHE. To check the ticket cache, specify the file location with `-c` argument in `klist` command. For example, + ``` $ klist -c /tmp/krb5cc_beeline @@ -149,19 +157,21 @@ $ klist -c /tmp/krb5cc_beeline ``` ## Add Kerberos Client Configuration File to JVM Search Path + The JVM, which JDBC client is running on, also needs to read the Kerberos client configuration file. However, JVM uses different default locations from Kerberos client, and does not honour `KRB5_CONFIG` environment variable. -OS | JVM Search Paths ----| --- -Linux | System scope: `/etc/krb5.conf` -MacOS | User scope: `$HOME/Library/Preferences/edu.mit.Kerberos`
System scope: `/etc/krb5.conf` -Windows | User scoep: `%USERPROFILE%\krb5.ini`
System scope: `%windir%\krb5.ini` +| OS | JVM Search Paths | +|---------|---------------------------------------------------------------------------------------------| +| Linux | System scope: `/etc/krb5.conf` | +| MacOS | User scope: `$HOME/Library/Preferences/edu.mit.Kerberos`
System scope: `/etc/krb5.conf` | +| Windows | User scope: `%USERPROFILE%\krb5.ini`
System scope: `%windir%\krb5.ini` | You can use JVM system property, `java.security.krb5.conf`, to overwrite the default location. ## Add Kerberos Ticket Cache to JVM Search Path + JVM determines the ticket cache location in the following order: 1. Path specified by `KRB5CCNAME` environment variable. Path must start with `FILE:`. 2. `/tmp/krb5cc_%{uid}` on Unix-like OS, e.g. Linux, MacOS @@ -171,24 +181,27 @@ JVM determines the ticket cache location in the following order: **Note**: - `${user.home}` and `${user.name}` are JVM system properties. - `${user.home}` should be replaced with `${user.dir}` if `${user.home}` is null. - -Ensure your ticket cache is stored as a file and put it in one of the above locations. + +Ensure your ticket cache is stored as a file and put it in one of the above locations. ## Ensure core-site.xml Exists in Classpath -Like hadoop clients, `hadoop.security.authentication` should be set to `KERBEROS` in `core-site.xml` -to let Hive JDBC driver use Kerberos authentication. `core-site.xml` should be placed under beeline's + +Like hadoop clients, `hadoop.security.authentication` should be set to `KERBEROS` in `core-site.xml` +to let Hive JDBC driver use Kerberos authentication. `core-site.xml` should be placed under beeline's classpath or BI tools' classpath. ### Beeline + Here are the usual locations where `core-site.xml` should exist for different beeline distributions: -Client | Location | Note ---- | --- | --- -Hive beeline | `$HADOOP_HOME/etc/hadoop` | Hive resolves `$HADOOP_HOME` and use `$HADOOP_HOME/bin/hadoop` command to launch beeline. `$HADOOP_HOME/etc/hadoop` is in `hadoop` command's classpath. -Spark beeline | `$HADOOP_CONF_DIR` | In `$SPARK_HOME/conf/spark-env.sh`, `$HADOOP_CONF_DIR` often be set to the directory containing hadoop client configuration files. -Kyuubi beeline | `$HADOOP_CONF_DIR` | In `$KYUUBI_HOME/conf/kyuubi-env.sh`, `$HADOOP_CONF_DIR` often be set to the directory containing hadoop client configuration files. +| Client | Location | Note | +|----------------|---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| +| Hive beeline | `$HADOOP_HOME/etc/hadoop` | Hive resolves `$HADOOP_HOME` and use `$HADOOP_HOME/bin/hadoop` command to launch beeline. `$HADOOP_HOME/etc/hadoop` is in `hadoop` command's classpath. | +| Spark beeline | `$HADOOP_CONF_DIR` | In `$SPARK_HOME/conf/spark-env.sh`, `$HADOOP_CONF_DIR` often be set to the directory containing hadoop client configuration files. | +| Kyuubi beeline | `$HADOOP_CONF_DIR` | In `$KYUUBI_HOME/conf/kyuubi-env.sh`, `$HADOOP_CONF_DIR` often be set to the directory containing hadoop client configuration files. | If `core-site.xml` is not found in above locations, create one with the following content: + ```xml @@ -199,6 +212,7 @@ If `core-site.xml` is not found in above locations, create one with the followin ``` ### BI Tools + As to BI tools, ways to add `core-site.xml` varies. Take DBeaver as an example. We can add files to DBeaver's classpath through its `Global libraries` preference. As `Global libraries` only accepts jar files, you should package `core-site.xml` into a jar file. @@ -210,13 +224,16 @@ $ jar -c -f core-site.jar core-site.xml ``` ## Connect with JDBC URL + The last step is to connect to Kyuubi with the right JDBC URL. -The JDBC URL should be in format: +The JDBC URL should be in format: ``` jdbc:hive2://:/;principal= ``` + or + ``` jdbc:hive2://:/;kyuubiServerPrincipal= ``` diff --git a/docs/client/bi_tools/datagrip.md b/docs/client/bi_tools/datagrip.md index 6d22444073c..5dbebf4383e 100644 --- a/docs/client/bi_tools/datagrip.md +++ b/docs/client/bi_tools/datagrip.md @@ -1,43 +1,58 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # DataGrip + ## What is DataGrip + [DataGrip](https://www.jetbrains.com/datagrip/) is a multi-engine database environment released by JetBrains, supporting MySQL and PostgreSQL, Microsoft SQL Server and Oracle, Sybase, DB2, SQLite, HyperSQL, Apache Derby, and H2. ## Preparation + ### Get DataGrip And Install + Please go to [Download DataGrip](https://www.jetbrains.com/datagrip/download) to get and install an appropriate version for yourself. + ### Get Kyuubi Started + [Get kyuubi server started](../../quick_start/quick_start.html) before you try DataGrip with kyuubi. For debugging purpose, you can use `tail -f` or `tailf` to track the server log. + ## Configurations + ### Start DataGrip + After you install DataGrip, just launch it. + ### Select Database + Substantially, this step is to choose a JDBC Driver type to use later. We can choose Apache Hive to set up a driver for Kyuubi. ![select database](../../imgs/datagrip/select_database.png) + ### Datasource Driver -You should first download the missing driver files. Just click on the link below, DataGrip will download and install those. + +You should first download the missing driver files. Just click on the link below, DataGrip will download and install those. ![datasource and driver](../../imgs/datagrip/datasource_and_driver.png) + ### Generic JDBC Connection Settings + After install drivers, you should configure the right host and port which you can find in kyuubi server log. By default, we use `localhost` and `10009` to configure. Of course, you can fill other configs. @@ -45,7 +60,9 @@ Of course, you can fill other configs. After generic configs, you can use test connection to test. ![configuration](../../imgs/datagrip/configuration.png) + ## Interacting With Kyuubi Server + Now, you can interact with Kyuubi server. The left side of the photo is the table, and the right side of the photo is the console. @@ -53,5 +70,7 @@ The left side of the photo is the table, and the right side of the photo is the You can interact through the visual interface or code. ![workspace](../../imgs/datagrip/workspace.png) + ## The End -There are many other amazing features in both Kyuubi and DataGrip and here is just the tip of the iceberg. The rest is for you to discover. \ No newline at end of file + +There are many other amazing features in both Kyuubi and DataGrip and here is just the tip of the iceberg. The rest is for you to discover. diff --git a/docs/client/bi_tools/hue.md b/docs/client/bi_tools/hue.md index 3582006c400..e2b2a97f97f 100644 --- a/docs/client/bi_tools/hue.md +++ b/docs/client/bi_tools/hue.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Cloudera Hue @@ -43,8 +42,8 @@ Welcome to ## Run Hue in Docker -Here we demo running Kyuubi on macOS and Hue on [Docker for Mac](https://docs.docker.com/docker-for-mac/), -there are several known limitations of network, and you can find +Here we demo running Kyuubi on macOS and Hue on [Docker for Mac](https://docs.docker.com/docker-for-mac/), +there are several known limitations of network, and you can find [workarounds from here](https://docs.docker.com/docker-for-mac/networking/#known-limitations-use-cases-and-workarounds). ### Configuration @@ -98,7 +97,7 @@ Having fun with Hue and Kyuubi! ## For CDH 6.x Users -If you are using CDH 6.x, there is a trick that CDH 6.x blocks Spark in default, you need to modify the configuration to +If you are using CDH 6.x, there is a trick that CDH 6.x blocks Spark in default, you need to modify the configuration to overwrite the `desktop.app_blacklist` to remove this restriction. Config Hue in Cloudera Manager. @@ -106,6 +105,7 @@ Config Hue in Cloudera Manager. ![](../../imgs/hue/cloudera_manager.png) Refer following configuration and tune it to fit your environment. + ``` [desktop] app_blacklist=zookeeper,hbase,impala,search,sqoop,security diff --git a/docs/client/cli/hive_beeline.rst b/docs/client/cli/hive_beeline.rst index fda925aa108..f75e00819f1 100644 --- a/docs/client/cli/hive_beeline.rst +++ b/docs/client/cli/hive_beeline.rst @@ -17,7 +17,7 @@ Hive Beeline ============ Kyuubi supports Apache Hive beeline that works with Kyuubi server. -Hive beeline is a `SQLLine CLI `_ based on the `Hive JDBC Driver <../jdbc/hive_jdbc.html>`_. +Hive beeline is a `SQLLine CLI `_ based on the `Hive JDBC Driver <../jdbc/hive_jdbc.html>`_. Prerequisites ------------- diff --git a/docs/client/cli/index.rst b/docs/client/cli/index.rst index 61be9ad8c0c..19122ced4ab 100644 --- a/docs/client/cli/index.rst +++ b/docs/client/cli/index.rst @@ -21,3 +21,4 @@ Command Line Interface(CLI)s kyuubi_beeline hive_beeline + trino_cli diff --git a/docs/client/cli/trino_cli.md b/docs/client/cli/trino_cli.md new file mode 100644 index 00000000000..68ebd830020 --- /dev/null +++ b/docs/client/cli/trino_cli.md @@ -0,0 +1,88 @@ + + +# Trino command line interface + +The Trino CLI provides a terminal-based, interactive shell for running queries. We can use it to connect Kyuubi server now. + +## Start Kyuubi Trino Server + +First we should configure the trino protocol and the service port in the `kyuubi.conf` + +``` +kyuubi.frontend.protocols TRINO +kyuubi.frontend.trino.bind.port 10999 #default port +``` + +## Install + +Download [trino-cli-363-executable.jar](https://repo1.maven.org/maven2/io/trino/trino-jdbc/363/trino-jdbc-363.jar), rename it to `trino`, make it executable with `chmod +x`, and run it to show the version of the CLI: + +``` +wget https://repo1.maven.org/maven2/io/trino/trino-jdbc/363/trino-jdbc-363.jar +mv trino-jdbc-363.jar trino +chmod +x trino +./trino --version +``` + +## Running the CLI + +The minimal command to start the CLI in interactive mode specifies the URL of the kyuubi server with the Trino protocol: + +``` +./trino --server http://localhost:10999 +``` + +If successful, you will get a prompt to execute commands. Use the help command to see a list of supported commands. Use the clear command to clear the terminal. To stop and exit the CLI, run exit or quit.: + +``` +trino> help + +Supported commands: +QUIT +EXIT +CLEAR +EXPLAIN [ ( option [, ...] ) ] + options: FORMAT { TEXT | GRAPHVIZ | JSON } + TYPE { LOGICAL | DISTRIBUTED | VALIDATE | IO } +DESCRIBE +SHOW COLUMNS FROM
+SHOW FUNCTIONS +SHOW CATALOGS [LIKE ] +SHOW SCHEMAS [FROM ] [LIKE ] +SHOW TABLES [FROM ] [LIKE ] +USE [.] +``` + +You can now run SQL statements. After processing, the CLI will show results and statistics. + +``` +trino> select 1; + _col0 +------- + 1 +(1 row) + +Query 20230216_125233_00806_examine_6hxus, FINISHED, 1 node +Splits: 1 total, 1 done (100.00%) +0.29 [0 rows, 0B] [0 rows/s, 0B/s] + +trino> +``` + +Many other options are available to further configure the CLI in interactive mode to +refer https://trino.io/docs/current/client/cli.html#running-the-cli diff --git a/docs/client/jdbc/hive_jdbc.md b/docs/client/jdbc/hive_jdbc.md index 186ad83b901..42d2f7b5a33 100644 --- a/docs/client/jdbc/hive_jdbc.md +++ b/docs/client/jdbc/hive_jdbc.md @@ -1,36 +1,34 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Hive JDBC Driver - ## Instructions Kyuubi does not provide its own JDBC Driver so far, as it is fully compatible with Hive JDBC and ODBC drivers that let you connect to popular Business Intelligence (BI) tools to query, analyze and visualize data though Spark SQL engines. - ## Install Hive JDBC For programing, the easiest way to get `hive-jdbc` is from [the maven central](https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc). For example, - **maven** + ```xml org.apache.hive @@ -40,11 +38,13 @@ For programing, the easiest way to get `hive-jdbc` is from [the maven central](h ``` - **sbt** + ```scala libraryDependencies += "org.apache.hive" % "hive-jdbc" % "2.3.8" ``` - **gradle** + ```gradle implementation group: 'org.apache.hive', name: 'hive-jdbc', version: '2.3.8' ``` @@ -53,7 +53,6 @@ For BI tools, please refer to [Quick Start](../../quick_start/index.html) to che If you find there is no specific document for the BI tool that you are using, don't worry, the configuration part for all BI tools are basically the same. Also, we will appreciate if you can help us to improve the document. - ## JDBC URL JDBC URLs have the following format: @@ -62,14 +61,14 @@ JDBC URLs have the following format: jdbc:hive2://:/;?#<[spark|hive]Vars> ``` -JDBC Parameter | Description ----------------| ----------- -host | The cluster node hosting Kyuubi Server. -port | The port number to which is Kyuubi Server listening. -dbName | Optional database name to set the current database to run the query against, use `default` if absent. -sessionVars | Optional `Semicolon(;)` separated `key=value` parameters for the JDBC/ODBC driver. Such as `user`, `password` and `hive.server2.proxy.user`. -kyuubiConfs | Optional `Semicolon(;)` separated `key=value` parameters for Kyuubi server to create the corresponding engine, dismissed if engine exists. -[spark|hive]Vars | Optional `Semicolon(;)` separated `key=value` parameters for Spark/Hive variables used for variable substitution. +| JDBC Parameter | Description | +|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------| +| host | The cluster node hosting Kyuubi Server. | +| port | The port number to which is Kyuubi Server listening. | +| dbName | Optional database name to set the current database to run the query against, use `default` if absent. | +| sessionVars | Optional `Semicolon(;)` separated `key=value` parameters for the JDBC/ODBC driver. Such as `user`, `password` and `hive.server2.proxy.user`. | +| kyuubiConfs | Optional `Semicolon(;)` separated `key=value` parameters for Kyuubi server to create the corresponding engine, dismissed if engine exists. | +| [spark|hive]Vars | Optional `Semicolon(;)` separated `key=value` parameters for Spark/Hive variables used for variable substitution. | ## Example @@ -80,3 +79,4 @@ jdbc:hive2://localhost:10009/default;hive.server2.proxy.user=proxy_user?kyuubi.e ## Unsupported Hive Features - Connect to HiveServer2 using HTTP transport. ```transportMode=http``` + diff --git a/docs/client/jdbc/index.rst b/docs/client/jdbc/index.rst index 31871f1382f..abcd6a452f2 100644 --- a/docs/client/jdbc/index.rst +++ b/docs/client/jdbc/index.rst @@ -22,4 +22,5 @@ JDBC Drivers kyuubi_jdbc hive_jdbc mysql_jdbc + trino_jdbc diff --git a/docs/client/jdbc/trino_jdbc.md b/docs/client/jdbc/trino_jdbc.md new file mode 100644 index 00000000000..0f91c4337e6 --- /dev/null +++ b/docs/client/jdbc/trino_jdbc.md @@ -0,0 +1,92 @@ + + +# Trino JDBC Driver + +## Instructions + +Kyuubi currently supports the Trino connection protocol, so we can use Trino-JDBC to connect to the kyuubi server +and submit SQL to Spark, Trino and other engines for execution. + +## Start Kyuubi Trino Server + +First we should configure the trino protocol and the service port in the `kyuubi.conf` + +``` +kyuubi.frontend.protocols TRINO +kyuubi.frontend.trino.bind.port 10999 #default port +``` + +## Install Trino JDBC + +Download [trino-jdbc-363.jar](https://repo1.maven.org/maven2/io/trino/trino-jdbc/363/trino-jdbc-363.jar) and add it to the classpath of your Java application. + +The driver is also available from Maven Central: + +```xml + + io.trino + trino-jdbc + 363 + +``` + +## JDBC URL + +When your driver is loaded, registered and configured, you are ready to connect to Trino from your application. The following JDBC URL formats are supported: + +``` +jdbc:trino://host:port +``` + +Trino JDBC example + +```java +String trinoHost = "localhost"; +String trinoPort = "10999"; +String trinoUser = "default"; +String trinoPassword = null; +Connection connection = null; +ResultSet rs = null; + +try { + // Create the connection using the JDBC URL + connection = DriverManager.getConnection("jdbc:trino://" + trinoHost + ":" + trinoPort, trinoUser, trinoPassword); + + // Do whatever you need to do with the connection + Statement stmt = connection.createStatement(); + rs = stmt.executeQuery("SELECT 1"); + + while (rs.next()) { + // retrieve data from the ResultSet + } + +} catch (Exception e) { + e.printStackTrace(); +} finally { + try { + // Close the connection when you're done with it + if (rs != null) rs.close(); + if (connection != null) connection.close(); + } catch (Exception e) { + e.printStackTrace(); + } +} +``` + +The configuration of the connection parameters can be found in the official trino documentation at: https://trino.io/docs/current/client/jdbc.html#connection-parameters + diff --git a/docs/client/python/pyhive.md b/docs/client/python/pyhive.md index f77e3908b19..dbebf684fc0 100644 --- a/docs/client/python/pyhive.md +++ b/docs/client/python/pyhive.md @@ -1,26 +1,26 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # PyHive [PyHive](https://github.com/dropbox/PyHive) is a collection of Python DB-API and SQLAlchemy interfaces for Hive. PyHive can connect with the Kyuubi server serving in thrift protocol as HiveServer2. ## Requirements + PyHive works with Python 2.7 / Python 3. Install PyHive via pip for the Hive interface. ``` @@ -28,6 +28,7 @@ pip install 'pyhive[hive]' ``` ## Usage + Use the Kyuubi server's host and thrift protocol port to connect. For further information about usages and features, e.g. DB-API async fetching, using in SQLAlchemy, please refer to [project homepage](https://github.com/dropbox/PyHive). @@ -42,8 +43,8 @@ print(cursor.fetchone()) print(cursor.fetchall()) ``` - ### Use PyHive with Pandas + PyHive provides a handy way to establish a SQLAlchemy compatible connection and works with Pandas dataframe for executing SQL and reading data via [`pandas.read_sql`](https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html). ```python @@ -57,12 +58,13 @@ conn = hive.Connection(host=kyuubi_host,port=10009) dataframe = pd.read_sql("SELECT id, name FROM test.example_table", conn) ``` - ### Authentication + If password is provided for connection, make sure the `auth` param set to either `CUSTOM` or `LDAP`. ```python # open connection conn = hive.Connection(host=kyuubi_host,port=10009, user='user', password='password', auth='CUSTOM') -``` \ No newline at end of file +``` + diff --git a/docs/client/python/pyspark.md b/docs/client/python/pyspark.md index cb459996d0d..b4fcb08e732 100644 --- a/docs/client/python/pyspark.md +++ b/docs/client/python/pyspark.md @@ -1,26 +1,26 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # PySpark [PySpark](https://spark.apache.org/docs/latest/api/python/index.html) is an interface for Apache Spark in Python. Kyuubi can be used as JDBC source in PySpark. ## Requirements + PySpark works with Python 3.7 and above. Install PySpark with Spark SQL and optional pandas support on Spark using PyPI as follows: @@ -33,20 +33,20 @@ For installation using Conda or manually downloading, please refer to [PySpark i ## Preparation +### Prepare JDBC driver -### Prepare JDBC driver Prepare JDBC driver jar file. Supported Hive compatible JDBC Driver as below: -| Driver | Driver Class Name | Remarks| -| ---------- | ----------------- | ----- | -| Kyuubi Hive Driver ([doc](../jdbc/kyuubi_jdbc.html))| org.apache.kyuubi.jdbc.KyuubiHiveDriver | Compile for the driver on master branch, as [KYUUBI #3484](https://github.com/apache/kyuubi/pull/3485) required by Spark JDBC source not yet included in released version. -| Hive Driver ([doc](../jdbc/hive_jdbc.html))| org.apache.hive.jdbc.HiveDriver | +| Driver | Driver Class Name | Remarks | +|------------------------------------------------------|-----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Kyuubi Hive Driver ([doc](../jdbc/kyuubi_jdbc.html)) | org.apache.kyuubi.jdbc.KyuubiHiveDriver | Compile for the driver on master branch, as [KYUUBI #3484](https://github.com/apache/kyuubi/pull/3485) required by Spark JDBC source not yet included in released version. | +| Hive Driver ([doc](../jdbc/hive_jdbc.html)) | org.apache.hive.jdbc.HiveDriver | Refer to docs of the driver and prepare the JDBC driver jar file. ### Prepare JDBC Hive Dialect extension -Hive Dialect support is required by Spark for wrapping SQL correctly and sending it to the JDBC driver. Kyuubi provides a JDBC dialect extension with auto-registered Hive Daliect support for Spark. Follow the instructions in [Hive Dialect Support](../../extensions/engines/spark/jdbc-dialect.html) to prepare the plugin jar file `kyuubi-extension-spark-jdbc-dialect_-*.jar`. +Hive Dialect support is required by Spark for wrapping SQL correctly and sending it to the JDBC driver. Kyuubi provides a JDBC dialect extension with auto-registered Hive Dialect support for Spark. Follow the instructions in [Hive Dialect Support](../../extensions/engines/spark/jdbc-dialect.html) to prepare the plugin jar file `kyuubi-extension-spark-jdbc-dialect_-*.jar`. ### Including jars of JDBC driver and Hive Dialect extension @@ -73,8 +73,6 @@ spark = SparkSession.builder \ .getOrCreate() ``` - - ## Usage For further information about PySpark JDBC usage and options, please refer to Spark's [JDBC To Other Databases](https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html). @@ -98,7 +96,6 @@ jdbcDF = spark.read \ From Spark 3.2.0, [`CREATE DATASOURCE TABLE`](https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html) is supported to create jdbc source with SQL. - ```python # create JDBC Datasource table with DDL spark.sql("""CREATE TABLE kyuubi_table USING JDBC @@ -120,13 +117,12 @@ df.writeTo("kyuubi_table").overwrite spark.sql("INSERT INTO kyuubi_table SELECT * FROM some_table") ``` - ### Use PySpark with Pandas + From PySpark 3.2.0, PySpark supports pandas API on Spark which allows you to scale your pandas workload out. Pandas-on-Spark DataFrame and Spark DataFrame are virtually interchangeable. More instructions in [From/to pandas and PySpark DataFrames](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/pandas_pyspark.html#pyspark). - ```python import pyspark.pandas as ps @@ -134,3 +130,4 @@ psdf = ps.range(10) sdf = psdf.to_spark().filter("id > 5") sdf.show() ``` + diff --git a/docs/client/rest/rest_api.md b/docs/client/rest/rest_api.md index b1c8edfeacf..fbff59f0500 100644 --- a/docs/client/rest/rest_api.md +++ b/docs/client/rest/rest_api.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # REST API v1 @@ -57,10 +57,10 @@ Get an information detail of a session #### Response Body -| Name | Description | Type | -|:----------|:-------------------------------------------|:-------| -| infoType | The type of session information | String | -| infoValue | The value of session information | String | +| Name | Description | Type | +|:----------|:---------------------------------|:-------| +| infoType | The type of session information | String | +| infoValue | The value of session information | String | ### GET /sessions/count @@ -68,9 +68,9 @@ Get the current open session count #### Response Body -| Name | Description | Type | -|:-----------------|:----------------------------------|:-----| -| openSessionCount | The count of opening session | Int | +| Name | Description | Type | +|:-----------------|:-----------------------------|:-----| +| openSessionCount | The count of opening session | Int | ### GET /sessions/execPool/statistic @@ -89,19 +89,16 @@ Create a session #### Request Parameters -| Name | Description | Type | -|:----------------|:-----------------------------------------|:-------| -| protocolVersion | The protocol version of Hive CLI service | Int | -| user | The user name | String | -| password | The user password | String | -| ipAddr | The user client IP address | String | -| configs | The configuration of the session | Map | +| Name | Description | Type | +|:--------|:---------------------------------|:-----| +| configs | The configuration of the session | Map | #### Response Body -| Name | Description | Type | -|:-----------|:------------------------------|:-------| -| identifier | The session handle identifier | String | +| Name | Description | Type | +|:---------------|:---------------------------------------------------------------------------------------------------|:-------| +| identifier | The session handle identifier | String | +| kyuubiInstance | The Kyuubi instance that holds the session and to call for the following operations in the session | String | ### DELETE /sessions/${sessionHandle} @@ -113,11 +110,12 @@ Create an operation with EXECUTE_STATEMENT type #### Request Body -| Name | Description | Type | -|:-------------|:---------------------------------------------------------------|:--------| -| statement | The SQL statement that you execute | String | -| runAsync | The flag indicates whether the query runs synchronously or not | Boolean | -| queryTimeout | The interval of query time out | Long | +| Name | Description | Type | +|:-------------|:---------------------------------------------------------------|:---------------| +| statement | The SQL statement that you execute | String | +| runAsync | The flag indicates whether the query runs synchronously or not | Boolean | +| queryTimeout | The interval of query time out | Long | +| confOverlay | The conf to overlay only for current operation | Map of key=val | #### Response Body @@ -335,7 +333,7 @@ Returns all the batches. #### Request Parameters | Name | Description | Type | -| :--------- |:----------------------------------------------------------------------------------------------------| :----- | +|:-----------|:----------------------------------------------------------------------------------------------------|:-------| | batchType | The batch type, such as spark/flink, if no batchType is specified,
return all types | String | | batchState | The valid batch state can be one of the following:
PENDING, RUNNING, FINISHED, ERROR, CANCELED | String | | batchUser | The user name that created the batch | String | @@ -347,7 +345,7 @@ Returns all the batches. #### Response Body | Name | Description | Type | -| :------ |:-----------------------------------| :--- | +|:--------|:-----------------------------------|:-----| | from | The start index of fetched batches | Int | | total | Number of batches fetched | Int | | batches | [Batch](#batch) List | List | @@ -358,8 +356,11 @@ Create a new batch. #### Request Body +- Media type: `application-json` +- JSON structure: + | Name | Description | Type | -| :-------- |:---------------------------------------------------|:-----------------| +|:----------|:---------------------------------------------------|:-----------------| | batchType | The batch type, such as Spark, Flink | String | | resource | The resource containing the application to execute | Path (required) | | className | Application main class | String(required) | @@ -371,7 +372,33 @@ Create a new batch. The created [Batch](#batch) object. -### GET /batches/{batchId} +### POST /batches (with uploading resource) + +Create a new batch with uploading resource file. + +Example of using `curl` command to send POST request to `/v1/batches` in `multipart-formdata` media type with uploading resource file from local path. + +```shell +curl --location --request POST 'http://localhost:10099/api/v1/batches' \ +--form 'batchRequest="{\"batchType\":\"SPARK\",\"className\":\"org.apache.spark.examples.SparkPi\",\"name\":\"Spark Pi\"}";type=application/json' \ +--form 'resourceFile=@"/local_path/example.jar"' +``` + +#### Request Body + +- Media type: `multipart-formdata` +- Request body structure in multiparts: + +| Name | Description | Media Type | +|:-------------|:--------------------------------------------------------------------------------------------------|:-----------------| +| batchRequest | The batch request in JSON format as request body requried in [POST /batches](#post-batches) | application/json | +| resourceFile | The resource to upload and execute, which will be cached on server and cleaned up after execution | File | + +#### Response Body + +The created [Batch](#batch) object. + +### GET /batches/${batchId} Returns the batch information. @@ -386,13 +413,13 @@ Kill the batch if it is still running. #### Request Parameters | Name | Description | Type | -| :---------------------- | :---------------------------- | :--------------- | +|:------------------------|:------------------------------|:-----------------| | hive.server2.proxy.user | the proxy user to impersonate | String(optional) | #### Response Body | Name | Description | Type | -| :------ |:--------------------------------------| :------ | +|:--------|:--------------------------------------|:--------| | success | Whether killed the batch successfully | Boolean | | msg | The kill batch message | String | @@ -403,14 +430,14 @@ Gets the local log lines from this batch. #### Request Parameters | Name | Description | Type | -| :--- |:--------------------------------------------------| :--- | +|:-----|:--------------------------------------------------|:-----| | from | Offset | Int | | size | Max number of log lines to return, 100 by default | Int | #### Response Body | Name | Description | Type | -| :-------- | :---------------- |:----------------| +|:----------|:------------------|:----------------| | logRowSet | The log lines | List of Strings | | rowCount | The log row count | Int | @@ -431,7 +458,7 @@ Delete the specified engine. #### Request Parameters | Name | Description | Type | -|:------------------------|:------------------------------| :--------------- | +|:------------------------|:------------------------------|:-----------------| | type | the engine type | String(optional) | | sharelevel | the engine share level | String(optional) | | subdomain | the engine subdomain | String(optional) | @@ -444,13 +471,14 @@ Get a list of satisfied engines. #### Request Parameters | Name | Description | Type | -|:------------------------|:------------------------------| :--------------- | +|:------------------------|:------------------------------|:-----------------| | type | the engine type | String(optional) | | sharelevel | the engine share level | String(optional) | | subdomain | the engine subdomain | String(optional) | | hive.server2.proxy.user | the proxy user to impersonate | String(optional) | #### Response Body + The [Engine](#engine) List. ## REST Objects @@ -458,11 +486,12 @@ The [Engine](#engine) List. ### Batch | Name | Description | Type | -| :------------- |:------------------------------------------------------------------| :----- | +|:---------------|:------------------------------------------------------------------|:-------| | id | The batch id | String | | user | The user created the batch | String | | batchType | The batch type | String | | name | The batch name | String | +| appStartTime | The batch application start time | Long | | appId | The batch application Id | String | | appUrl | The batch application tracking url | String | | appState | The batch application state | String | @@ -500,11 +529,11 @@ The [Engine](#engine) List. | statementId | The unique identifier of a single operation | String | | remoteId | The unique identifier of a single operation at engine side | String | | statement | The sql that you execute | String | -| shouldRunAsync | The flag indicating whether the query runs synchronously or not | Boolean | +| shouldRunAsync | The flag indicating whether the query runs synchronously or not | Boolean | | state | The current operation state | String | | eventTime | The time when the event created & logged | Long | -| createTime | The time for changing to the current operation state | Long | -| startTime | The time the query start to time of this operation | Long | +| createTime | The time for changing to the current operation state | Long | +| startTime | The time the query start to time of this operation | Long | | completeTime | Time time the query ends | Long | | exception | Caught exception if have | Throwable | | sessionId | The identifier of the parent session | String | @@ -517,8 +546,8 @@ The [Engine](#engine) List. | columnName | The name of the column | String | | dataType | The type descriptor for this column | String | | columnIndex | The index of this column in the schema | Int | -| precision | The precision of the column | Int | -| scale | The scale of the column | Int | +| precision | The precision of the column | Int | +| scale | The scale of the column | Int | | comment | The comment of the column | String | ### Row @@ -536,12 +565,13 @@ The [Engine](#engine) List. ### Engine -| Name | Description | Type | -| :------------- |:-------------------------------------------------------------------| :----- | -| version | The version of the Kyuubi server that creates this engine instance | String | -| user | The user created the engine | String | -| engineType | The engine type | String | -| sharelevel | The engine share level | String | -| subdomain | The engine subdomain | String | -| instance | host:port for the engine node | String | -| namespace | The namespace used to expose the engine to KyuubiServers | String | +| Name | Description | Type | +|:-----------|:-------------------------------------------------------------------|:-------| +| version | The version of the Kyuubi server that creates this engine instance | String | +| user | The user created the engine | String | +| engineType | The engine type | String | +| sharelevel | The engine share level | String | +| subdomain | The engine subdomain | String | +| instance | host:port for the engine node | String | +| namespace | The namespace used to expose the engine to KyuubiServers | String | + diff --git a/docs/community/collaborators.md b/docs/community/collaborators.md index c9ff9e2e816..264e8b3c92f 100644 --- a/docs/community/collaborators.md +++ b/docs/community/collaborators.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Collaborators diff --git a/docs/community/release.md b/docs/community/release.md index f3c15983511..8252669c0dc 100644 --- a/docs/community/release.md +++ b/docs/community/release.md @@ -1,24 +1,25 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> Kyuubi Release Guide -=== +==================== ## Introduction + The Apache Kyuubi project periodically declares and publishes releases. A release is one or more packages of the project artifact(s) that are approved for general public distribution and use. They may come with various degrees of caveat regarding their perceived quality and potential for change, such as "alpha", "beta", "incubating", @@ -32,8 +33,8 @@ Each release is executed by a Release Manager, who is selected among the Kyuubi the process that the Release Manager follows to perform a release. Any changes to this process should be discussed and adopted on the [dev mailing list](mailto:dev@kyuubi.apache.org). -Please remember that publishing software has legal consequences. This guide complements the foundation-wide -[Product Release Policy](https://www.apache.org/dev/release.html) and +Please remember that publishing software has legal consequences. This guide complements the foundation-wide +[Product Release Policy](https://www.apache.org/dev/release.html) and [Release Distribution Policy](https://www.apache.org/dev/release-distribution). ### Overview @@ -42,12 +43,14 @@ The release process consists of several steps: 1. Decide to release 2. Prepare for the release -3. Cut branch off for __major__ release +3. Cut branch off for __feature__ release 4. Build a release candidate 5. Vote on the release candidate 6. If necessary, fix any issues and go back to step 3. 7. Finalize the release 8. Promote the release +9. Remove the dist repo directories for deprecated release candidates +10. Publish docker image ## Decide to release @@ -86,6 +89,7 @@ export ASF_PASSWORD= ``` #### Java Home + An available environment variable `JAVA_HOME`, you can do `echo $JAVA_HOME` to check it. Note that, the Java version should be 8. @@ -101,11 +105,13 @@ You need to have a GPG key to sign the release artifacts. Please be aware of the with your Apache account, please create one according to the guidelines. Determine your Apache GPG Key and Key ID, as follows: + ```shell gpg --list-keys --keyid-format SHORT ``` This will list your GPG keys. One of these should reflect your Apache account, for example: + ```shell pub rsa4096 2021-08-30 [SC] 8FC8075E1FDC303276C676EE8001952629BCC75D @@ -118,16 +124,18 @@ sub rsa4096 2021-08-30 [E] Here, the key ID is the 8-digit hex string in the pub line: `29BCC75D`. To export the PGP public key, using: + ```shell gpg --armor --export 29BCC75D ``` If you have more than one gpg key, you can specify the default key as the following: + ``` echo 'default-key ' > ~/.gnupg/gpg.conf ``` -The last step is to update the KEYS file with your code signing key +The last step is to update the KEYS file with your code signing key https://www.apache.org/dev/openpgp.html#export-public-key ```shell @@ -145,12 +153,12 @@ gpg --keyserver hkp://keyserver.ubuntu.com --send-keys ${PUBLIC_KEY} # send publ gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys ${PUBLIC_KEY} # verify ``` -## Cut branch if for major release +## Cut branch if for feature release Kyuubi use version pattern `{MAJOR_VERSION}.{MINOR_VERSION}.{PATCH_VERSION}[-{OPTIONAL_SUFFIX}]`, e.g. `1.7.0`. -__Major Release__ means `MAJOR_VERSION` or `MINOR_VERSION` changed, and __Patch Release__ means `PATCH_VERSION` changed. +__Feature Release__ means `MAJOR_VERSION` or `MINOR_VERSION` changed, and __Patch Release__ means `PATCH_VERSION` changed. -The main step towards preparing a major release is to create a release branch. This is done via standard Git branching +The main step towards preparing a feature release is to create a release branch. This is done via standard Git branching mechanism and should be announced to the community once the branch is created. > Note: If you are releasing a patch version, you can ignore this step. @@ -161,31 +169,49 @@ After cutting release branch, don't forget bump version in `master` branch. ## Build a release candidate -> Don't forget to switch to the release branch! +> Don't forget to switch to the release branch! -1. Set environment variables. +- Set environment variables. ```shell export RELEASE_VERSION= export RELEASE_RC_NO= +export NEXT_VERSION= ``` -2. Bump version. +- Bump version, and create a git tag for the release candidate. + +Considering that other committers may merge PRs during your release period, you should accomplish the version change +first, and then come back to the release candidate tag to continue the rest release process. + +The tag pattern is `v${RELEASE_VERSION}-rc${RELEASE_RC_NO}`, e.g. `v1.7.0-rc0` + +> NOTE: After all the voting passed, be sure to create a final tag with the pattern: `v${RELEASE_VERSION}` ```shell +# Bump to the release version build/mvn versions:set -DgenerateBackupPoms=false -DnewVersion="${RELEASE_VERSION}" - git commit -am "[RELEASE] Bump ${RELEASE_VERSION}" -``` -3. Create a git tag for the release candidate. +# Create tag +git tag v${RELEASE_VERSION}-rc${RELEASE_RC_NO} -The tag pattern is `v${RELEASE_VERSION}-rc${RELEASE_RC_NO}`, e.g. `v1.7.0-rc0` +# Prepare for the next development version +build/mvn versions:set -DgenerateBackupPoms=false -DnewVersion="${NEXT_VERSION}-SNAPSHOT" +git commit -am "[RELEASE] Bump ${NEXT_VERSION}-SNAPSHOT" -> NOTE: After all the voting passed, be sure to create a final tag with the pattern: `v${RELEASE_VERSION}` +# Push branch to apache remote repo +git push apache + +# Push tag to apache remote repo +git push apache v${RELEASE_VERSION}-rc${RELEASE_RC_NO} -4. Package the release binaries & sources, and upload them to the Apache staging SVN repo. Publish jars to the Apache -staging Maven repo. +# Go back to release candidate tag +git checkout v${RELEASE_VERSION}-rc${RELEASE_RC_NO} +``` + +- Package source and binary artifacts, and upload them to the Apache staging SVN repo. Publish jars to the Apache + staging Maven repo. ```shell build/release/release.sh publish @@ -193,7 +219,7 @@ build/release/release.sh publish To make your release available in the staging repository, you must close the staging repo in the [Apache Nexus](https://repository.apache.org/#stagingRepositories). Until you close, you can re-run deploying to staging multiple times. But once closed, it will create a new staging repo. So ensure you close this, so that the next RC (if need be) is on a new repo. Once everything is good, close the staging repository on Apache Nexus. -5. Generate a pre-release note from GitHub for the subsequent voting. +- Generate a pre-release note from GitHub for the subsequent voting. Goto the [release page](https://github.com/apache/kyuubi/releases) and click the "Draft a new release" button, then it would jump to a new page to prepare the release. @@ -209,7 +235,7 @@ The release voting takes place on the Apache Kyuubi developers list. - Recommend represent voting closing time in UTC format. - Make sure the email is in text format and the links are correct. -> Note: you can generate the voting mail content for dev ML automatically via invoke the `build/release/script/dev_kyuubi_vote.sh` script. +> Note: you can generate the voting mail content for dev ML automatically via invoke the `build/release/script/dev_kyuubi_vote.sh` script. Once the vote is done, you should also send out a summary email with the totals, with a subject that looks something like __[VOTE][RESULT] Release Apache Kyuubi ...__ @@ -225,7 +251,7 @@ After the vote passes, to upload the binaries to Apache mirrors, you move the bi be where they are voted) to release directory. This "moving" is the only way you can add stuff to the actual release directory. (Note: only PMC members can move to release directory) -Move the sub-directory in "dev" to the corresponding directory in "release". If you've added your signing key to the +Move the subdirectory in "dev" to the corresponding directory in "release". If you've added your signing key to the KEYS file, also update the release copy. ```shell @@ -237,7 +263,7 @@ This will be mirrored throughout the Apache network. For Maven Central Repository, you can Release from the [Apache Nexus Repository Manager](https://repository.apache.org/). Log in, open "Staging Repositories", find the one voted on, select and click "Release" and confirm. If successful, it -should show up under https://repository.apache.org/content/repositories/releases/org/apache/kyuubi/ and the same under +should show up under https://repository.apache.org/content/repositories/releases/org/apache/kyuubi/ and the same under https://repository.apache.org/content/groups/maven-staging-group/org/apache/kyuubi/ (look for the correct release version). After some time this will be synced to [Maven Central](https://search.maven.org/) automatically. @@ -249,8 +275,7 @@ Fork and clone [Apache Kyuubi website](https://github.com/apache/kyuubi-website) 1. Add a new markdown file in `src/zh/news/`, `src/en/news/` 2. Add a new markdown file in `src/zh/release/`, `src/en/release/` -3. Follow [Build Document](../develop_tools/build_document.md) to build documents, then copy `apache/kyuubi`'s - folder `docs/_build/html` to `apache/kyuubi-website`'s folder `content/docs/r{RELEASE_VERSION}` +3. Update `releases` defined in `hugo.toml`'s `[params]` part. ### Create an Announcement @@ -262,10 +287,9 @@ Note that, you must use the apache.org email to send announce to `announce@apach Enjoy an adult beverage of your choice, and congratulations on making a Kyuubi release. - ## Remove the dist repo directories for deprecated release candidates -Remove the deprecated dist repo directories at last. +Remove the deprecated dist repo directories at last. ```shell cd work/svn-dev @@ -274,3 +298,7 @@ svn delete https://dist.apache.org/repos/dist/dev/kyuubi/{RELEASE_TAG} \ --password "${ASF_PASSWORD}" \ --message "Remove deprecated Apache Kyuubi ${RELEASE_TAG}" ``` + +## Publish docker image + +See steps in `https://github.com/apache/kyuubi-docker/blob/master/release/release_guide.md` diff --git a/docs/conf.py b/docs/conf.py index 3df98c6e34c..dcf038314c5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -64,7 +64,7 @@ author = 'Apache Kyuubi Community' # The full version, including alpha/beta/rc tags -release = subprocess.getoutput("cd .. && build/mvn help:evaluate -Dexpression=project.version|grep -v Using|grep -v INFO|grep -v WARNING|tail -n 1").split('\n')[-1] +release = subprocess.getoutput("grep 'kyuubi-parent' -C1 ../pom.xml | grep '' | awk -F '[<>]' '{print $3}'") # -- General configuration --------------------------------------------------- diff --git a/docs/connector/flink/index.rst b/docs/connector/flink/index.rst index c9d91091f71..e7d40fd43b9 100644 --- a/docs/connector/flink/index.rst +++ b/docs/connector/flink/index.rst @@ -19,6 +19,6 @@ Connectors For Flink SQL Query Engine .. toctree:: :maxdepth: 2 - flink_table_store + paimon hudi iceberg diff --git a/docs/connector/flink/flink_table_store.rst b/docs/connector/flink/paimon.rst similarity index 50% rename from docs/connector/flink/flink_table_store.rst rename to docs/connector/flink/paimon.rst index c2fd6679d3d..b67101488e8 100644 --- a/docs/connector/flink/flink_table_store.rst +++ b/docs/connector/flink/paimon.rst @@ -13,57 +13,56 @@ See the License for the specific language governing permissions and limitations under the License. -`Flink Table Store`_ -========== +`Apache Paimon (Incubating)`_ +============================= -Flink Table Store is a unified storage to build dynamic tables for both streaming and batch processing in Flink, -supporting high-speed data ingestion and timely data query. +Apache Paimon (Incubating) is a streaming data lake platform that supports high-speed data ingestion, change data tracking, and efficient real-time analytics. .. tip:: - This article assumes that you have mastered the basic knowledge and operation of `Flink Table Store`_. - For the knowledge about Flink Table Store not mentioned in this article, + This article assumes that you have mastered the basic knowledge and operation of `Apache Paimon (Incubating)`_. + For the knowledge not mentioned in this article, you can obtain it from its `Official Documentation`_. -By using kyuubi, we can run SQL queries towards Flink Table Store which is more -convenient, easy to understand, and easy to expand than directly using -flink to manipulate Flink Table Store. +By using kyuubi, we can run SQL queries towards Apache Paimon (Incubating) which is more +convenient, easy to understand, and easy to expand than directly using flink. -Flink Table Store Integration -------------------- +Apache Paimon (Incubating) Integration +-------------------------------------- -To enable the integration of kyuubi flink sql engine and Flink Table Store, you need to: +To enable the integration of kyuubi flink sql engine and Apache Paimon (Incubating), you need to: -- Referencing the Flink Table Store :ref:`dependencies` +- Referencing the Apache Paimon (Incubating) :ref:`dependencies` -.. _flink-table-store-deps: +.. _flink-paimon-deps: Dependencies ************ -The **classpath** of kyuubi flink sql engine with Flink Table Store supported consists of +The **classpath** of kyuubi flink sql engine with Apache Paimon (Incubating) supported consists of 1. kyuubi-flink-sql-engine-\ |release|\ _2.12.jar, the engine jar deployed with Kyuubi distributions 2. a copy of flink distribution -3. flink-table-store-dist-.jar (example: flink-table-store-dist-0.2.jar), which can be found in the `Maven Central`_ +3. paimon-flink-.jar (example: paimon-flink-1.16-0.4-SNAPSHOT.jar), which can be found in the `Apache Paimon (Incubating) Supported Engines Flink`_ +4. flink-shaded-hadoop-2-uber-.jar, which code can be found in the `Pre-bundled Hadoop Jar`_ -In order to make the Flink Table Store packages visible for the runtime classpath of engines, we can use these methods: +In order to make the Apache Paimon (Incubating) packages visible for the runtime classpath of engines, you need to: -1. Put the Flink Table Store packages into ``$FLINK_HOME/lib`` directly +1. Put the Apache Paimon (Incubating) packages into ``$FLINK_HOME/lib`` directly 2. Setting the HADOOP_CLASSPATH environment variable or copy the `Pre-bundled Hadoop Jar`_ to flink/lib. .. warning:: - Please mind the compatibility of different Flink Table Store and Flink versions, which can be confirmed on the page of `Flink Table Store multi engine support`_. + Please mind the compatibility of different Apache Paimon (Incubating) and Flink versions, which can be confirmed on the page of `Apache Paimon (Incubating) multi engine support`_. -Flink Table Store Operations ------------------- +Apache Paimon (Incubating) Operations +------------------------------------- Taking ``CREATE CATALOG`` as a example, .. code-block:: sql CREATE CATALOG my_catalog WITH ( - 'type'='table-store', - 'warehouse'='hdfs://nn:8020/warehouse/path' -- or 'file:///tmp/foo/bar' + 'type'='paimon', + 'warehouse'='file:/tmp/paimon' ); USE CATALOG my_catalog; @@ -96,7 +95,7 @@ Taking ``Streaming Query`` as a example, SET 'execution.runtime-mode' = 'streaming'; SELECT * FROM MyTable /*+ OPTIONS ('log.scan'='latest') */; -Taking ``Rescale Bucket` as a example, +Taking ``Rescale Bucket`` as a example, .. code-block:: sql @@ -104,8 +103,8 @@ Taking ``Rescale Bucket` as a example, INSERT OVERWRITE my_table PARTITION (dt = '2022-01-01'); -.. _Flink Table Store: https://nightlies.apache.org/flink/flink-table-store-docs-stable/ -.. _Official Documentation: https://nightlies.apache.org/flink/flink-table-store-docs-stable/ -.. _Maven Central: https://mvnrepository.com/artifact/org.apache.flink/flink-table-store-dist -.. _Pre-bundled Hadoop Jar: https://flink.apache.org/downloads.html -.. _Flink Table Store multi engine support: https://nightlies.apache.org/flink/flink-table-store-docs-stable/docs/engines/overview/ +.. _Apache Paimon (Incubating): https://paimon.apache.org/ +.. _Official Documentation: https://paimon.apache.org/docs/master/ +.. _Apache Paimon (Incubating) Supported Engines Flink: https://paimon.apache.org/docs/master/engines/flink/#preparing-paimon-jar-file +.. _Pre-bundled Hadoop Jar: https://flink.apache.org/downloads/#additional-components +.. _Apache Paimon (Incubating) multi engine support: https://paimon.apache.org/docs/master/engines/overview/ diff --git a/docs/connector/hive/index.rst b/docs/connector/hive/index.rst index 2b2b863a67e..d96f8b04188 100644 --- a/docs/connector/hive/index.rst +++ b/docs/connector/hive/index.rst @@ -19,4 +19,5 @@ Connectors for Hive SQL Query Engine .. toctree:: :maxdepth: 2 + paimon iceberg diff --git a/docs/connector/hive/paimon.rst b/docs/connector/hive/paimon.rst new file mode 100644 index 00000000000..000d2d7e83c --- /dev/null +++ b/docs/connector/hive/paimon.rst @@ -0,0 +1,100 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +`Apache Paimon (Incubating)`_ +========== + +Apache Paimon(incubating) is a streaming data lake platform that supports high-speed data ingestion, change data tracking and efficient real-time analytics. + +.. tip:: + This article assumes that you have mastered the basic knowledge and operation of `Apache Paimon (Incubating)`_. + For the knowledge about Apache Paimon (Incubating) not mentioned in this article, + you can obtain it from its `Official Documentation`_. + +By using Kyuubi, we can run SQL queries towards Apache Paimon (Incubating) which is more +convenient, easy to understand, and easy to expand than directly using +Hive to manipulate Apache Paimon (Incubating). + +Apache Paimon (Incubating) Integration +------------------- + +To enable the integration of kyuubi hive sql engine and Apache Paimon (Incubating), you need to: + +- Referencing the Apache Paimon (Incubating) :ref:`dependencies` +- Setting the environment variable :ref:`configurations` + +.. _hive-paimon-deps: + +Dependencies +************ + +The **classpath** of kyuubi hive sql engine with Iceberg supported consists of + +1. kyuubi-hive-sql-engine-\ |release|\ _2.12.jar, the engine jar deployed with Kyuubi distributions +2. a copy of hive distribution +3. paimon-hive-connector--.jar (example: paimon-hive-connector-3.1-0.4-SNAPSHOT.jar), which can be found in the `Apache Paimon (Incubating) Supported Engines Hive`_ + +In order to make the Hive packages visible for the runtime classpath of engines, we can use one of these methods: + +1. You can create an auxlib folder under the root directory of Hive, and copy paimon-hive-connector-3.1-.jar into auxlib. +2. Execute ADD JAR statement in the Kyuubi to add dependencies to Hive’s auxiliary classpath. For example: + +.. code-block:: sql + + ADD JAR /path/to/paimon-hive-connector-3.1-.jar; + +.. warning:: + The second method is not recommended. If you’re using the MR execution engine and running a join statement, you may be faced with the exception + ``org.apache.hive.com.esotericsoftware.kryo.kryoexception: unable to find class.`` + +.. warning:: + Please mind the compatibility of different Apache Paimon (Incubating) and Hive versions, which can be confirmed on the page of `Apache Paimon (Incubating) multi engine support`_. + +.. _hive-paimon-conf: + +Configurations +************** + +If you are using HDFS, make sure that the environment variable HADOOP_HOME or HADOOP_CONF_DIR is set. + +Apache Paimon (Incubating) Operations +------------------ + +Apache Paimon (Incubating) only supports only reading table store tables through Hive. +A common scenario is to write data with Spark or Flink and read data with Hive. +You can follow this document `Apache Paimon (Incubating) Quick Start with Paimon Hive Catalog`_ to write data to a table which can also be accessed directly from Hive. +and then use Kyuubi Hive SQL engine to query the table with the following SQL ``SELECT`` statement. + +Taking ``Query Data`` as an example, + +.. code-block:: sql + + SELECT a, b FROM test_table ORDER BY a; + +Taking ``Query External Table`` as an example, + +.. code-block:: sql + + CREATE EXTERNAL TABLE external_test_table + STORED BY 'org.apache.paimon.hive.PaimonStorageHandler' + LOCATION '/path/to/table/store/warehouse/default.db/test_table'; + + SELECT a, b FROM test_table ORDER BY a; + +.. _Apache Paimon (Incubating): https://paimon.apache.org/ +.. _Official Documentation: https://paimon.apache.org/docs/master/ +.. _Apache Paimon (Incubating) Quick Start with Paimon Hive Catalog: https://paimon.apache.org/docs/master/engines/hive/#quick-start-with-paimon-hive-catalog +.. _Apache Paimon (Incubating) Supported Engines Hive: https://paimon.apache.org/docs/master/engines/hive/ +.. _Apache Paimon (Incubating) multi engine support: https://paimon.apache.org/docs/master/engines/overview/ diff --git a/docs/connector/spark/flink_table_store.rst b/docs/connector/spark/flink_table_store.rst deleted file mode 100644 index ee4c2b352c2..00000000000 --- a/docs/connector/spark/flink_table_store.rst +++ /dev/null @@ -1,90 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -`Flink Table Store`_ -========== - -Flink Table Store is a unified storage to build dynamic tables for both streaming and batch processing in Flink, -supporting high-speed data ingestion and timely data query. - -.. tip:: - This article assumes that you have mastered the basic knowledge and operation of `Flink Table Store`_. - For the knowledge about Flink Table Store not mentioned in this article, - you can obtain it from its `Official Documentation`_. - -By using kyuubi, we can run SQL queries towards Flink Table Store which is more -convenient, easy to understand, and easy to expand than directly using -spark to manipulate Flink Table Store. - -Flink Table Store Integration -------------------- - -To enable the integration of kyuubi spark sql engine and Flink Table Store through -Apache Spark Datasource V2 and Catalog APIs, you need to: - -- Referencing the Flink Table Store :ref:`dependencies` -- Setting the spark extension and catalog :ref:`configurations` - -.. _spark-flink-table-store-deps: - -Dependencies -************ - -The **classpath** of kyuubi spark sql engine with Flink Table Store supported consists of - -1. kyuubi-spark-sql-engine-\ |release|\ _2.12.jar, the engine jar deployed with Kyuubi distributions -2. a copy of spark distribution -3. flink-table-store-spark-.jar (example: flink-table-store-spark-0.2.jar), which can be found in the `Maven Central`_ - -In order to make the Flink Table Store packages visible for the runtime classpath of engines, we can use one of these methods: - -1. Put the Flink Table Store packages into ``$SPARK_HOME/jars`` directly -2. Set ``spark.jars=/path/to/flink-table-store-spark`` - -.. warning:: - Please mind the compatibility of different Flink Table Store and Spark versions, which can be confirmed on the page of `Flink Table Store multi engine support`_. - -.. _spark-flink-table-store-conf: - -Configurations -************** - -To activate functionality of Flink Table Store, we can set the following configurations: - -.. code-block:: properties - - spark.sql.catalog.tablestore=org.apache.flink.table.store.spark.SparkCatalog - spark.sql.catalog.tablestore.warehouse=file:/tmp/warehouse - -Flink Table Store Operations ------------------- - -Flink Table Store supports reading table store tables through Spark. -A common scenario is to write data with Flink and read data with Spark. -You can follow this document `Flink Table Store Quick Start`_ to write data to a table store table -and then use kyuubi spark sql engine to query the table with the following SQL ``SELECT`` statement. - - -.. code-block:: sql - - select * from table_store.default.word_count; - - - -.. _Flink Table Store: https://nightlies.apache.org/flink/flink-table-store-docs-stable/ -.. _Flink Table Store Quick Start: https://nightlies.apache.org/flink/flink-table-store-docs-stable/docs/try-table-store/quick-start/ -.. _Official Documentation: https://nightlies.apache.org/flink/flink-table-store-docs-stable/ -.. _Maven Central: https://mvnrepository.com/artifact/org.apache.flink -.. _Flink Table Store multi engine support: https://nightlies.apache.org/flink/flink-table-store-docs-stable/docs/engines/overview/ diff --git a/docs/connector/spark/index.rst b/docs/connector/spark/index.rst index 790e804f268..d1503443c63 100644 --- a/docs/connector/spark/index.rst +++ b/docs/connector/spark/index.rst @@ -23,7 +23,7 @@ By default, it provides accessibility to hive warehouses with various file forma supported, such as parquet, orc, json, etc. Also,it can easily integrate with other third-party libraries, such as Hudi, -Iceberg, Delta Lake, Kudu, Flink Table Store, HBase,Cassandra, etc. +Iceberg, Delta Lake, Kudu, Apache Paimon (Incubating), HBase,Cassandra, etc. We also provide sample data sources like TDC-DS, TPC-H for testing and benchmarking purpose. @@ -37,7 +37,7 @@ purpose. iceberg kudu hive - flink_table_store + paimon tidb tpcds tpch diff --git a/docs/connector/spark/kudu.md b/docs/connector/spark/kudu.md index ca02eb95cd7..ce2d1e88cc1 100644 --- a/docs/connector/spark/kudu.md +++ b/docs/connector/spark/kudu.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kudu @@ -26,6 +26,7 @@ When you are reading this documentation, we suppose that you are not necessary t Anything missing on this page about Apache Kudu background knowledge, you can refer to its official website. ## Why Kyuubi on Kudu + Basically, Kyuubi can take place of HiveServer2 as a multi tenant ad-hoc SQL on Hadoop solution, with the advantages of speed and power coming from Spark SQL. You can run SQL queries towards both data source and Hive tables whose data is secured only with computing resources you are authorized. > Spark SQL supports operating on a variety of data sources through the DataFrame interface. A DataFrame can be operated on using relational transformations and can also be used to create a temporary view. Registering a DataFrame as a temporary view allows you to run SQL queries over its data. This section describes the general methods for loading and saving data using the Spark Data Sources and then goes into specific options that are available for the built-in data sources. @@ -33,11 +34,13 @@ Basically, Kyuubi can take place of HiveServer2 as a multi tenant ad-hoc SQL on In Kyuubi, we can register Kudu tables and other data source tables as Spark temporary views to enable federated union queries across Hive, Kudu, and other data sources. ## Kudu Integration with Apache Spark + Before integrating Kyuubi with Kudu, we strongly suggest that you integrate and test Spark with Kudu first. You may find the guide from Kudu's online documentation -- [Kudu Integration with Spark](https://kudu.apache.org/docs/developing.html#_kudu_integration_with_spark) ## Kudu Integration with Kyuubi #### Install Kudu Spark Dependency + Confirm your Kudu cluster version and download the corresponding kudu spark dependency library, such as [org.apache.kudu:kudu-spark3_2.12-1.14.0](https://repo1.maven.org/maven2/org/apache/kudu/kudu-spark3_2.12/1.14.0/kudu-spark3_2.12-1.14.0.jar) to `$SPARK_HOME`/jars. #### Start Kyuubi @@ -97,7 +100,6 @@ options ( 5 rows selected (1.083 seconds) ``` - #### Join Kudu table with Hive table ```sql @@ -179,6 +181,7 @@ No rows selected (0.611 seconds) ``` ## References + [https://kudu.apache.org/](https://kudu.apache.org/) [https://kudu.apache.org/docs/developing.html#_kudu_integration_with_spark](https://kudu.apache.org/docs/developing.html#_kudu_integration_with_spark) [https://github.com/apache/kyuubi](https://github.com/apache/kyuubi) diff --git a/docs/connector/spark/paimon.rst b/docs/connector/spark/paimon.rst new file mode 100644 index 00000000000..14e74195503 --- /dev/null +++ b/docs/connector/spark/paimon.rst @@ -0,0 +1,110 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +`Apache Paimon (Incubating)`_ +========== + +Apache Paimon(incubating) is a streaming data lake platform that supports high-speed data ingestion, change data tracking and efficient real-time analytics. + +.. tip:: + This article assumes that you have mastered the basic knowledge and operation of `Apache Paimon (Incubating)`_. + For the knowledge about Apache Paimon (Incubating) not mentioned in this article, + you can obtain it from its `Official Documentation`_. + +By using kyuubi, we can run SQL queries towards Apache Paimon (Incubating) which is more +convenient, easy to understand, and easy to expand than directly using +spark to manipulate Apache Paimon (Incubating). + +Apache Paimon (Incubating) Integration +------------------- + +To enable the integration of kyuubi spark sql engine and Apache Paimon (Incubating), you need to set the following configurations: + +- Referencing the Apache Paimon (Incubating) :ref:`dependencies` +- Setting the spark extension and catalog :ref:`configurations` + +.. _spark-paimon-deps: + +Dependencies +************ + +The **classpath** of kyuubi spark sql engine with Apache Paimon (Incubating) consists of + +1. kyuubi-spark-sql-engine-\ |release|\ _2.12.jar, the engine jar deployed with Kyuubi distributions +2. a copy of spark distribution +3. paimon-spark-.jar (example: paimon-spark-3.3-0.4-20230323.002035-5.jar), which can be found in the `Apache Paimon (Incubating) Supported Engines Spark3`_ + +In order to make the Apache Paimon (Incubating) packages visible for the runtime classpath of engines, we can use one of these methods: + +1. Put the Apache Paimon (Incubating) packages into ``$SPARK_HOME/jars`` directly +2. Set ``spark.jars=/path/to/paimon-spark-.jar`` + +.. warning:: + Please mind the compatibility of different Apache Paimon (Incubating) and Spark versions, which can be confirmed on the page of `Apache Paimon (Incubating) multi engine support`_. + +.. _spark-paimon-conf: + +Configurations +************** + +To activate functionality of Apache Paimon (Incubating), we can set the following configurations: + +.. code-block:: properties + + spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog + spark.sql.catalog.paimon.warehouse=file:/tmp/paimon + +Apache Paimon (Incubating) Operations +------------------ + + +Taking ``CREATE NAMESPACE`` as a example, + +.. code-block:: sql + + CREATE DATABASE paimon.default; + USE paimon.default; + +Taking ``CREATE TABLE`` as a example, + +.. code-block:: sql + + create table my_table ( + k int, + v string + ) tblproperties ( + 'primary-key' = 'k' + ); + +Taking ``SELECT`` as a example, + +.. code-block:: sql + + SELECT * FROM my_table; + + +Taking ``INSERT`` as a example, + +.. code-block:: sql + + INSERT INTO my_table VALUES (1, 'Hi Again'), (3, 'Test'); + + + + +.. _Apache Paimon (Incubating): https://paimon.apache.org/ +.. _Official Documentation: https://paimon.apache.org/docs/master/ +.. _Apache Paimon (Incubating) Supported Engines Spark3: https://paimon.apache.org/docs/master/engines/spark3/ +.. _Apache Paimon (Incubating) multi engine support: https://paimon.apache.org/docs/master/engines/overview/ diff --git a/docs/connector/trino/flink_table_store.rst b/docs/connector/trino/flink_table_store.rst deleted file mode 100644 index 8dd0c4061f8..00000000000 --- a/docs/connector/trino/flink_table_store.rst +++ /dev/null @@ -1,94 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -`Flink Table Store`_ -========== - -Flink Table Store is a unified storage to build dynamic tables for both streaming and batch processing in Flink, -supporting high-speed data ingestion and timely data query. - -.. tip:: - This article assumes that you have mastered the basic knowledge and operation of `Flink Table Store`_. - For the knowledge about Flink Table Store not mentioned in this article, - you can obtain it from its `Official Documentation`_. - -By using kyuubi, we can run SQL queries towards Flink Table Store which is more -convenient, easy to understand, and easy to expand than directly using -trino to manipulate Flink Table Store. - -Flink Table Store Integration -------------------- - -To enable the integration of kyuubi trino sql engine and Flink Table Store, you need to: - -- Referencing the Flink Table Store :ref:`dependencies` -- Setting the trino extension and catalog :ref:`configurations` - -.. _trino-flink-table-store-deps: - -Dependencies -************ - -The **classpath** of kyuubi trino sql engine with Flink Table Store supported consists of - -1. kyuubi-trino-sql-engine-\ |release|\ _2.12.jar, the engine jar deployed with Kyuubi distributions -2. a copy of trino distribution -3. flink-table-store-trino-.jar (example: flink-table-store-trino-0.2.jar), which code can be found in the `Source Code`_ -4. flink-shaded-hadoop-2-uber-2.8.3-10.0.jar, which code can be found in the `Pre-bundled Hadoop 2.8.3`_ - -In order to make the Flink Table Store packages visible for the runtime classpath of engines, we can use these methods: - -1. Build the flink-table-store-trino-.jar by reference to `Flink Table Store Trino README`_ -2. Put the flink-table-store-trino-.jar and flink-shaded-hadoop-2-uber-2.8.3-10.0.jar packages into ``$TRINO_SERVER_HOME/plugin/tablestore`` directly - -.. warning:: - Please mind the compatibility of different Flink Table Store and Trino versions, which can be confirmed on the page of `Flink Table Store multi engine support`_. - -.. _trino-flink-table-store-conf: - -Configurations -************** - -To activate functionality of Flink Table Store, we can set the following configurations: - -Catalogs are registered by creating a catalog properties file in the $TRINO_SERVER_HOME/etc/catalog directory. -For example, create $TRINO_SERVER_HOME/etc/catalog/tablestore.properties with the following contents to mount the tablestore connector as the tablestore catalog: - -.. code-block:: properties - - connector.name=tablestore - warehouse=file:///tmp/warehouse - -Flink Table Store Operations ------------------- - -Flink Table Store supports reading table store tables through Trino. -A common scenario is to write data with Flink and read data with Trino. -You can follow this document `Flink Table Store Quick Start`_ to write data to a table store table -and then use kyuubi trino sql engine to query the table with the following SQL ``SELECT`` statement. - - -.. code-block:: sql - - SELECT * FROM tablestore.default.t1 - - -.. _Flink Table Store: https://nightlies.apache.org/flink/flink-table-store-docs-stable/ -.. _Flink Table Store Quick Start: https://nightlies.apache.org/flink/flink-table-store-docs-stable/docs/try-table-store/quick-start/ -.. _Official Documentation: https://nightlies.apache.org/flink/flink-table-store-docs-stable/ -.. _Source Code: https://github.com/JingsongLi/flink-table-store-trino -.. _Flink Table Store multi engine support: https://nightlies.apache.org/flink/flink-table-store-docs-stable/docs/engines/overview/ -.. _Pre-bundled Hadoop 2.8.3: https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar -.. _Flink Table Store Trino README: https://github.com/JingsongLi/flink-table-store-trino#readme diff --git a/docs/connector/trino/hudi.rst b/docs/connector/trino/hudi.rst new file mode 100644 index 00000000000..5c965a0b64b --- /dev/null +++ b/docs/connector/trino/hudi.rst @@ -0,0 +1,80 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +`Hudi`_ +======== + +Apache Hudi (pronounced “hoodie”) is the next generation streaming data lake platform. +Apache Hudi brings core warehouse and database functionality directly to a data lake. + +.. tip:: + This article assumes that you have mastered the basic knowledge and operation of `Hudi`_. + For the knowledge about Hudi not mentioned in this article, + you can obtain it from its `Official Documentation`_. + +By using Kyuubi, we can run SQL queries towards Hudi which is more convenient, easy to understand, +and easy to expand than directly using Trino to manipulate Hudi. + +Hudi Integration +---------------- + +To enable the integration of Kyuubi Trino SQL engine and Hudi, you need to: + +- Setting the Trino extension and catalog :ref:`configurations` + +.. _trino-hudi-conf: + +Configurations +************** + +Catalogs are registered by creating a file of catalog properties in the `$TRINO_SERVER_HOME/etc/catalog` directory. +For example, we can create a `$TRINO_SERVER_HOME/etc/catalog/hudi.properties` with the following contents to mount the Hudi connector as a Hudi catalog: + +.. code-block:: properties + + connector.name=hudi + hive.metastore.uri=thrift://example.net:9083 + +Note: You need to replace $TRINO_SERVER_HOME above to your Trino server home path like `/opt/trino-server-406`. + +More configuration properties can be found in the `Hudi connector in Trino document`_. + +.. tip:: + Trino version 398 or higher, it is recommended to use the Hudi connector. + You don't need to install any dependencies in version 398 or higher. + +Hudi Operations +--------------- +The globally available and read operation statements are supported in Trino. +These statements can be found in `Trino SQL Support`_. +Currently, Trino cannot write data to a Hudi table. +A common scenario is to write data with Spark/Flink and read data with Trino. +You can use the Kyuubi Trino SQL engine to query the table with the following SQL ``SELECT`` statement. + +Taking ``Query Data`` as a example, + +.. code-block:: sql + + USE example.example_schema; + + SELECT symbol, max(ts) + FROM stock_ticks_cow + GROUP BY symbol + HAVING symbol = 'GOOG'; + +.. _Hudi: https://hudi.apache.org/ +.. _Official Documentation: https://hudi.apache.org/docs/overview +.. _Hudi connector in Trino document: https://trino.io/docs/current/connector/hudi.html +.. _Trino SQL Support: https://trino.io/docs/current/language/sql-support.html# diff --git a/docs/connector/trino/index.rst b/docs/connector/trino/index.rst index a5c5675ce70..290966a5cf7 100644 --- a/docs/connector/trino/index.rst +++ b/docs/connector/trino/index.rst @@ -19,5 +19,6 @@ Connectors For Trino SQL Engine .. toctree:: :maxdepth: 2 - flink_table_store + paimon + hudi iceberg \ No newline at end of file diff --git a/docs/connector/trino/paimon.rst b/docs/connector/trino/paimon.rst new file mode 100644 index 00000000000..5ac892234f8 --- /dev/null +++ b/docs/connector/trino/paimon.rst @@ -0,0 +1,92 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +`Apache Paimon (Incubating)`_ +========== + +Apache Paimon(incubating) is a streaming data lake platform that supports high-speed data ingestion, change data tracking and efficient real-time analytics. + +.. tip:: + This article assumes that you have mastered the basic knowledge and operation of `Apache Paimon (Incubating)`_. + For the knowledge about Apache Paimon (Incubating) not mentioned in this article, + you can obtain it from its `Official Documentation`_. + +By using kyuubi, we can run SQL queries towards Apache Paimon (Incubating) which is more +convenient, easy to understand, and easy to expand than directly using +trino to manipulate Apache Paimon (Incubating). + +Apache Paimon (Incubating) Integration +------------------- + +To enable the integration of kyuubi trino sql engine and Apache Paimon (Incubating), you need to: + +- Referencing the Apache Paimon (Incubating) :ref:`dependencies` +- Setting the trino extension and catalog :ref:`configurations` + +.. _trino-paimon-deps: + +Dependencies +************ + +The **classpath** of kyuubi trino sql engine with Apache Paimon (Incubating) supported consists of + +1. kyuubi-trino-sql-engine-\ |release|\ _2.12.jar, the engine jar deployed with Kyuubi distributions +2. a copy of trino distribution +3. paimon-trino-.jar (example: paimon-trino-0.2.jar), which code can be found in the `Source Code`_ +4. flink-shaded-hadoop-2-uber-.jar, which code can be found in the `Pre-bundled Hadoop`_ + +In order to make the Apache Paimon (Incubating) packages visible for the runtime classpath of engines, you need to: + +1. Build the paimon-trino-.jar by reference to `Apache Paimon (Incubating) Trino README`_ +2. Put the paimon-trino-.jar and flink-shaded-hadoop-2-uber-.jar packages into ``$TRINO_SERVER_HOME/plugin/tablestore`` directly + +.. warning:: + Please mind the compatibility of different Apache Paimon (Incubating) and Trino versions, which can be confirmed on the page of `Apache Paimon (Incubating) multi engine support`_. + +.. _trino-paimon-conf: + +Configurations +************** + +To activate functionality of Apache Paimon (Incubating), we can set the following configurations: + +Catalogs are registered by creating a catalog properties file in the $TRINO_SERVER_HOME/etc/catalog directory. +For example, create $TRINO_SERVER_HOME/etc/catalog/tablestore.properties with the following contents to mount the tablestore connector as the tablestore catalog: + +.. code-block:: properties + + connector.name=tablestore + warehouse=file:///tmp/warehouse + +Apache Paimon (Incubating) Operations +------------------ + +Apache Paimon (Incubating) supports reading table store tables through Trino. +A common scenario is to write data with Spark or Flink and read data with Trino. +You can follow this document `Apache Paimon (Incubating) Engines Flink Quick Start`_ to write data to a table store table +and then use kyuubi trino sql engine to query the table with the following SQL ``SELECT`` statement. + + +.. code-block:: sql + + SELECT * FROM tablestore.default.t1 + +.. _Apache Paimon (Incubating): https://paimon.apache.org/ +.. _Apache Paimon (Incubating) multi engine support: https://paimon.apache.org/docs/master/engines/overview/ +.. _Apache Paimon (Incubating) Engines Flink Quick Start: https://paimon.apache.org/docs/master/engines/flink/#quick-start +.. _Official Documentation: https://paimon.apache.org/docs/master/ +.. _Source Code: https://github.com/JingsongLi/paimon-trino +.. _Pre-bundled Hadoop: https://flink.apache.org/downloads/#additional-components +.. _Apache Paimon (Incubating) Trino README: https://github.com/JingsongLi/paimon-trino#readme diff --git a/docs/deployment/engine_lifecycle.md b/docs/deployment/engine_lifecycle.md index 35944fa232e..63b1a80a233 100644 --- a/docs/deployment/engine_lifecycle.md +++ b/docs/deployment/engine_lifecycle.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # The TTL Of Kyuubi Engines @@ -26,7 +26,6 @@ To better improve the overall resource utilization of the cluster, - The time to wait for the resource to be allocated, such as the scheduling delay, the start/stop cost. - A longer time-to-live(TTL) for allocated resources can significantly reduce such time costs within an application. - - The time being idle of the resource. - A shorter time to live for allocated resources can make all resources in rapid turnarounds across applications. @@ -45,7 +44,7 @@ To better improve the overall resource utilization of the cluster, ### Engine TTL -| Key | Default | Meaning | Type | Since | +| Key | Default | Meaning | Type | Since | |----------------------------------------------|--------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------|--------------------------------------| | kyuubi\.session\.engine
\.check\.interval |
PT5M
|
The check interval for engine timeout
|
duration
|
1.0.0
| | kyuubi\.session\.engine
\.idle\.timeout |
PT30M
|
engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate.
|
duration
|
1.0.0
| diff --git a/docs/deployment/engine_on_kubernetes.md b/docs/deployment/engine_on_kubernetes.md index 6f3e73a4a43..44fca1602e3 100644 --- a/docs/deployment/engine_on_kubernetes.md +++ b/docs/deployment/engine_on_kubernetes.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Deploy Kyuubi engines on Kubernetes @@ -22,7 +21,7 @@ When you want to run Kyuubi's Spark SQL engines on Kubernetes, you'd better have cognition upon the following things. -* Read about [Running Spark On Kubernetes](http://spark.apache.org/docs/latest/running-on-kubernetes.html) +* Read about [Running Spark On Kubernetes](https://spark.apache.org/docs/latest/running-on-kubernetes.html) * An active Kubernetes cluster * [Kubectl](https://kubernetes.io/docs/reference/kubectl/overview/) * KubeConfig of the target cluster @@ -98,7 +97,7 @@ As it known to us all, Kubernetes can use configurations to mount volumes into d * persistentVolumeClaim: mounts a PersistentVolume into a pod. Note: Please -see [the Security section of this document](http://spark.apache.org/docs/latest/running-on-kubernetes.html#security) for security issues related to volume mounts. +see [the Security section of this document](https://spark.apache.org/docs/latest/running-on-kubernetes.html#security) for security issues related to volume mounts. ``` spark.kubernetes.driver.volumes...options.path= @@ -108,7 +107,7 @@ spark.kubernetes.executor.volumes...options.path= spark.kubernetes.executor.volumes...mount.path= ``` -Read [Using Kubernetes Volumes](http://spark.apache.org/docs/latest/running-on-kubernetes.html#using-kubernetes-volumes) for more about volumes. +Read [Using Kubernetes Volumes](https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-kubernetes-volumes) for more about volumes. ### PodTemplateFile @@ -118,4 +117,4 @@ To do so, specify the spark properties `spark.kubernetes.driver.podTemplateFile` ### Other -You can read Spark's official documentation for [Running on Kubernetes](http://spark.apache.org/docs/latest/running-on-kubernetes.html) for more information. \ No newline at end of file +You can read Spark's official documentation for [Running on Kubernetes](https://spark.apache.org/docs/latest/running-on-kubernetes.html) for more information. diff --git a/docs/deployment/engine_on_yarn.md b/docs/deployment/engine_on_yarn.md index 54f8b508f0f..6812afa46db 100644 --- a/docs/deployment/engine_on_yarn.md +++ b/docs/deployment/engine_on_yarn.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Deploy Kyuubi engines on Yarn @@ -24,11 +23,11 @@ When you want to deploy Kyuubi's Spark SQL engines on YARN, you'd better have cognition upon the following things. -- Knowing the basics about [Running Spark on YARN](http://spark.apache.org/docs/latest/running-on-yarn.html) +- Knowing the basics about [Running Spark on YARN](https://spark.apache.org/docs/latest/running-on-yarn.html) - A binary distribution of Spark which is built with YARN support - You can use the built-in Spark distribution - You can get it from [Spark official website](https://spark.apache.org/downloads.html) directly - - You can [Build Spark](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version-and-enabling-yarn) with `-Pyarn` maven option + - You can [Build Spark](https://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version-and-enabling-yarn) with `-Pyarn` maven option - An active [Apache Hadoop YARN](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html) cluster - An active Apache Hadoop HDFS cluster - Setup Hadoop client configurations at the machine the Kyuubi server locates @@ -40,6 +39,7 @@ When you want to deploy Kyuubi's Spark SQL engines on YARN, you'd better have co Either `HADOOP_CONF_DIR` or `YARN_CONF_DIR` is configured and points to the Hadoop client configurations directory, usually, `$HADOOP_HOME/etc/hadoop`. If the `HADOOP_CONF_DIR` points the YARN and HDFS cluster correctly, you should be able to run the `SparkPi` example on YARN. + ```bash $ HADOOP_CONF_DIR=/path/to/hadoop/conf $SPARK_HOME/bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ @@ -81,34 +81,34 @@ the QUEUE configured at Kyuubi server side will be used as default. Pass the configurations below through the JDBC connection string to set how many instances of Spark executor will be used and how many cpus and memory will Spark driver, ApplicationMaster and each executor take. -Name | Default | Meaning ---- | --- | --- -spark.executor.instances | 1 | The number of executors for static allocation -spark.executor.cores | 1 | The number of cores to use on each executor -spark.yarn.am.memory | 512m | Amount of memory to use for the YARN Application Master in client mode -spark.yarn.am.memoryOverhead | amMemory * 0.10, with minimum of 384 | Amount of non-heap memory to be allocated per am process in client mode -spark.driver.memory | 1g | Amount of memory to use for the driver process -spark.driver.memoryOverhead | driverMemory * 0.10, with minimum of 384 | Amount of non-heap memory to be allocated per driver process in cluster mode -spark.executor.memory | 1g | Amount of memory to use for the executor process -spark.executor.memoryOverhead | executorMemory * 0.10, with minimum of 384 | Amount of additional memory to be allocated per executor process. This is memory that accounts for things like VM overheads, interned strings other native overheads, etc - -It is recommended to use [Dynamic Allocation](http://spark.apache.org/docs/3.0.1/configuration.html#dynamic-allocation) with Kyuubi, +| Name | Default | Meaning | +|-------------------------------|--------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| spark.executor.instances | 1 | The number of executors for static allocation | +| spark.executor.cores | 1 | The number of cores to use on each executor | +| spark.yarn.am.memory | 512m | Amount of memory to use for the YARN Application Master in client mode | +| spark.yarn.am.memoryOverhead | amMemory * 0.10, with minimum of 384 | Amount of non-heap memory to be allocated per am process in client mode | +| spark.driver.memory | 1g | Amount of memory to use for the driver process | +| spark.driver.memoryOverhead | driverMemory * 0.10, with minimum of 384 | Amount of non-heap memory to be allocated per driver process in cluster mode | +| spark.executor.memory | 1g | Amount of memory to use for the executor process | +| spark.executor.memoryOverhead | executorMemory * 0.10, with minimum of 384 | Amount of additional memory to be allocated per executor process. This is memory that accounts for things like VM overheads, interned strings other native overheads, etc | + +It is recommended to use [Dynamic Allocation](https://spark.apache.org/docs/3.0.1/configuration.html#dynamic-allocation) with Kyuubi, since the SQL engine will be long-running for a period, execute user's queries from clients periodically, and the demand for computing resources is not the same for those queries. -It is better for Spark to release some executors when either the query is lightweight, or the SQL engine is being idled. +It is better for Spark to release some executors when either the query is lightweight, or the SQL engine is being idled. ##### Tuning You can specify `spark.yarn.archive` or `spark.yarn.jars` to point to a world-readable location that contains Spark jars on HDFS, -which allows YARN to cache it on nodes so that it doesn't need to be distributed each time an application runs. +which allows YARN to cache it on nodes so that it doesn't need to be distributed each time an application runs. ##### Others -Please refer to [Spark properties](http://spark.apache.org/docs/latest/running-on-yarn.html#spark-properties) to check other acceptable configs. +Please refer to [Spark properties](https://spark.apache.org/docs/latest/running-on-yarn.html#spark-properties) to check other acceptable configs. ### Kerberos -Kyuubi currently does not support Spark's [YARN-specific Kerberos Configuration](http://spark.apache.org/docs/3.0.1/running-on-yarn.html#kerberos), +Kyuubi currently does not support Spark's [YARN-specific Kerberos Configuration](https://spark.apache.org/docs/3.0.1/running-on-yarn.html#kerberos), so `spark.kerberos.keytab` and `spark.kerberos.principal` should not use now. Instead, you can schedule a periodically `kinit` process via `crontab` task on the local machine that hosts Kyuubi server or simply use [Kyuubi Kinit](settings.html#kinit). @@ -142,6 +142,7 @@ yarn.application.id: application_00000000XX_00XX Either `HADOOP_CONF_DIR` or `YARN_CONF_DIR` is configured and points to the Hadoop client configurations directory, usually, `$HADOOP_HOME/etc/hadoop`. If the `HADOOP_CONF_DIR` points to the YARN and HDFS cluster correctly, and the `HADOOP_CLASSPATH` environment variable is set, you can launch a Flink on YARN session, and submit an example job: + ```bash # we assume to be in the root directory of # the unzipped Flink distribution @@ -162,7 +163,7 @@ export HADOOP_CLASSPATH=`hadoop classpath` # (4) Stop YARN session (replace the application id based # on the output of the yarn-session.sh command) echo "stop" | ./bin/yarn-session.sh -id application_XXXXX_XXX - ``` +``` If the `TopSpeedWindowing` passes, configure it in `$KYUUBI_HOME/conf/kyuubi-env.sh` @@ -174,9 +175,9 @@ $ echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> $KYUUBI_HOME/conf/kyuubi The `FLINK_HADOOP_CLASSPATH` is required, too. -For users who are using Hadoop 3.x, Hadoop shaded client is recommended instead of Hadoop vanilla jars. -For users who are using Hadoop 2.x, `FLINK_HADOOP_CLASSPATH` should be set to hadoop classpath to use Hadoop -vanilla jars. For users which does not use Hadoop services, e.g. HDFS, YARN at all, Hadoop client jars +For users who are using Hadoop 3.x, Hadoop shaded client is recommended instead of Hadoop vanilla jars. +For users who are using Hadoop 2.x, `FLINK_HADOOP_CLASSPATH` should be set to hadoop classpath to use Hadoop +vanilla jars. For users which does not use Hadoop services, e.g. HDFS, YARN at all, Hadoop client jars is also required, and recommend to use Hadoop shaded client as Hadoop 3.x's users do. See [HADOOP-11656](https://issues.apache.org/jira/browse/HADOOP-11656) for details of Hadoop shaded client. @@ -186,11 +187,13 @@ To use Hadoop shaded client, please configure $KYUUBI_HOME/conf/kyuubi-env.sh as ```bash $ echo "export FLINK_HADOOP_CLASSPATH=/path/to/hadoop-client-runtime-3.3.2.jar:/path/to/hadoop-client-api-3.3.2.jar" >> $KYUUBI_HOME/conf/kyuubi-env.sh ``` + To use Hadoop vanilla jars, please configure $KYUUBI_HOME/conf/kyuubi-env.sh as follows: ```bash $ echo "export FLINK_HADOOP_CLASSPATH=`hadoop classpath`" >> $KYUUBI_HOME/conf/kyuubi-env.sh ``` + ### Deployment Modes Supported by Flink on YARN For experiment use, we recommend deploying Kyuubi Flink SQL engine in [Session Mode](https://nightlies.apache.org/flink/flink-docs-stable/docs/deployment/resource-providers/yarn/#session-mode). @@ -240,11 +243,11 @@ If the `Hive SQL` passes and there is a job in Yarn Web UI, It indicates the hiv #### Required Environment Variable -The `HIVE_HADOOP_CLASSPATH` is required, too. It should contain `commons-collections-*.jar`, +The `HIVE_HADOOP_CLASSPATH` is required, too. It should contain `commons-collections-*.jar`, `hadoop-client-runtime-*.jar`, `hadoop-client-api-*.jar` and `htrace-core4-*.jar`. -All four jars are in the `HADOOP_HOME`. +All four jars are in the `HADOOP_HOME`. -For example, in Hadoop 3.1.0 version, the following is their location. +For example, in Hadoop 3.1.0 version, the following is their location. - `${HADOOP_HOME}/share/hadoop/common/lib/commons-collections-3.2.2.jar` - `${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-3.1.0.jar` - `${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-3.1.0.jar` @@ -256,3 +259,4 @@ Configure them in `$KYUUBI_HOME/conf/kyuubi-env.sh` or `$HIVE_HOME/conf/hive-env $ echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> $KYUUBI_HOME/conf/kyuubi-env.sh $ echo "export HIVE_HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/common/lib/commons-collections-3.2.2.jar:${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-3.1.0.jar:${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-3.1.0.jar:${HADOOP_HOME}/share/hadoop/common/lib/htrace-core4-4.1.0-incubating.jar" >> $KYUUBI_HOME/conf/kyuubi-env.sh ``` + diff --git a/docs/deployment/engine_share_level.md b/docs/deployment/engine_share_level.md index 2272c9d1ced..4a7b680cb4e 100644 --- a/docs/deployment/engine_share_level.md +++ b/docs/deployment/engine_share_level.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # The Share Level Of Kyuubi Engines @@ -32,9 +31,9 @@ Using Spark to process data is like driving an all-wheel-drive hefty horsepower However, - Cars have their limit of 0-60 times. -In a similar way, all Spark applications also have to warm up before go full speed. + In a similar way, all Spark applications also have to warm up before go full speed. - Cars have a constant number of seats and are not allowed to be overloaded. -Due to the master-slave architecture of Spark and the resource configured ahead, the overall workload of a single application is predictable. + Due to the master-slave architecture of Spark and the resource configured ahead, the overall workload of a single application is predictable. - Cars have various shapes to meet our needs. With this feature, Kyuubi give you a more flexible way to handle different big data workloads. @@ -43,15 +42,15 @@ With this feature, Kyuubi give you a more flexible way to handle different big d The current supported share levels are, -| Share Level | Syntax | Scenario | Isolation Degree | Sharability | -| --- | --- | ---- | --- | --- | -| **CONNECTION** | One engine per session | Large-scale ETL
Ad hoc | High | Low | -| **USER** | One engine per user | Ad hoc
Small-scale ETL | Medium | Medium| -| **GROUP** | One engine per primary group | Ad hoc
Small-scale ETL | Low | High | -| **SERVER**| One engine per cluster | Admin | Highest If Secured
Lowest If Unsecured | Admin ONLY If Secured | +| Share Level | Syntax | Scenario | Isolation Degree | Shareability | +|----------------|------------------------------|------------------------------|----------------------------------------------|-----------------------| +| **CONNECTION** | One engine per session | Large-scale ETL
Ad hoc | High | Low | +| **USER** | One engine per user | Ad hoc
Small-scale ETL | Medium | Medium | +| **GROUP** | One engine per primary group | Ad hoc
Small-scale ETL | Low | High | +| **SERVER** | One engine per cluster | Admin | Highest If Secured
Lowest If Unsecured | Admin ONLY If Secured | - Better isolation degree of engines gives us better stability of an engine and the query executions running on it. -- Better sharability of engines means we are more likely to reuse an engine which is already in full speed. +- Better shareability of engines means we are more likely to reuse an engine which is already in full speed. ### CONNECTION @@ -79,6 +78,7 @@ When closing session, the corresponding engine will be shutdown at the same time
*Figure.2 USER Share Level* +
All sessions with USER share level use the same engine if and only if the session user is the same. @@ -102,7 +102,6 @@ This TTL allows new sessions to be established quickly without waiting for the e - An engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the group name as the effective username, so here the group name is kind of special user who is able to visit the compute resources/data of a team. It follows the [Hadoop GroupsMapping](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/GroupsMapping.html) to map user to a primary group. If the primary group is not found, it falls back to the USER level. @@ -138,14 +137,14 @@ The `kyuubi.engine.share.level.subdomain` shall be configured in the JDBC connec ### Hybrid -All supported share levels can be used together in a single Kyuubi server or cluster. +All supported share levels can be used together in a single Kyuubi server or cluster. ## Related Configurations - kyuubi.engine.share.level(kyuubi.session.engine.share.level) - Default: USER - Candidates: USER, CONNECTION, GROUP, SERVER - - Meaning: The base level for how an engine is created, cached and shared to sessions. + - Meaning: The base level for how an engine is created, cached and shared to sessions. - Usage: It can be set both in the server configuration file and also connection URL. The latter has higher priority. - kyuubi.session.engine.idle.timeout - Default: PT30M (30 min) @@ -160,4 +159,4 @@ All supported share levels can be used together in a single Kyuubi server or clu ## Conclusion -With This feature, end-users are able to leverage engines in different ways to handle their different workloads, such as large-scale ETL jobs and interactive ad hoc queries. +With this feature, end-users are able to leverage engines in different ways to handle their different workloads, such as large-scale ETL jobs and interactive ad hoc queries. diff --git a/docs/deployment/high_availability_guide.md b/docs/deployment/high_availability_guide.md index 0189432d65f..353e549ebba 100644 --- a/docs/deployment/high_availability_guide.md +++ b/docs/deployment/high_availability_guide.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi High Availability Guide @@ -22,20 +21,19 @@ As an enterprise-class ad-hoc SQL query service built on top of [Apache Spark](h Running Kyuubi in HA mode is to use groups of computers or containers that support SQL query service on Kyuubi that can be reliably utilized with a minimum amount of down-time. Kyuubi operates by using [Apache ZooKeeper](https://zookeeper.apache.org/) to harness redundant service instances in groups that provide continuous service when one or more components fail. -Without HA, if a server crashes, Kyuubi will be unavailable until the crashed server is fixed. With HA, this situation will be remedied by hardware/software faults auto-detecting, and immediately another Kyuubi service instance will be ready to serve without requiring human intervention. +Without HA, if a server crashes, Kyuubi will be unavailable until the crashed server is fixed. With HA, this situation will be remedied by hardware/software faults auto-detecting, and immediately another Kyuubi service instance will be ready to serve without requiring human intervention. ## HA Architecture Currently, Kyuubi supports load balancing to make the whole system highly available. Load balancing aims to optimize all Kyuubi service unit's usage, maximize throughput, minimize response time, and avoid overload of a single unit. -Using multiple Kyuubi service units with load balancing instead of a single unit may increase reliability and availability through redundancy. +Using multiple Kyuubi service units with load balancing instead of a single unit may increase reliability and availability through redundancy.
- ### Key Benefits - High concurrency @@ -46,7 +44,6 @@ Using multiple Kyuubi service units with load balancing instead of a single unit After all connection are released, it stops then. - The dependencies of Kyuubi engines are free to change, such as bump up versions, modify configurations, add external jars, relocate to another engine home. Everything will be reloaded during start and stop. - ## System-side Deployment When applying HA to Kyuubi deployment, we need to be aware of the below two thing basically, @@ -67,7 +64,6 @@ But it doesn't have any availability to being highly available. For production deployment purpose, an external zookeeper cluster is required for `kyuubi.ha.zookeeper.quorum`. In this mode, multiple `k.i.`s can be registered to the same ServerSpace configured by `kyuubi.ha.zookeeper.namespace` and serve together. - ## Client-side Usage With [Kyuubi Hive JDBC Driver](https://mvnrepository.com/artifact/org.apache.kyuubi/kyuubi-hive-jdbc) or vanilla Hive JDBC Driver, a client can specify service discovery mode in JDBC connection string, i.e. `serviceDiscoveryMode=zooKeeper;` and set `zooKeeperNamespace=kyuubi;`, then it can randomly pick one of the Kyuubi service uris from the specified ZooKeeper addresses in the `/kyuubi` path. @@ -82,19 +78,19 @@ bin/beeline -u 'jdbc:hive2://10.242.189.214:2181/;serviceDiscoveryMode=zooKeeper Kyuubi supports hot upgrade one of server in a HA cluster which is transparent to users. -- If you have specified a custom port for Kyuubi server +- If you have specified a custom port for Kyuubi server For example, the Kyuubi server started at host `kyuubi.host` with port `10009`, you can run the following cmd using `bin/kyuubi-ctl`: - + ```shell ./bin/kyuubi-ctl delete server --host "kyuubi.host" --port "10009" ``` - + Kyuubi server will stop until all session closed, and then you can start a new Kyuubi server. - If you use a random port for Kyuubi server - You can just start the new Kyuubi Server, then runing cmd using `bin/kyuubi-ctl`: + You can just start the new Kyuubi Server, and then run cmd using `bin/kyuubi-ctl`: ```shell ./bin/kyuubi-ctl delete server --host "kyuubi.host" --port "${PORT_FPR_OLD_KYUUBI_SERVER}" @@ -105,4 +101,6 @@ Kyuubi supports hot upgrade one of server in a HA cluster which is transparent t ```shell grep "server.KyuubiThriftBinaryFrontendService: Starting and exposing JDBC connection at" logs/kyuubi-*.out ``` + Note that, you do not need to care when the old Kyuubi server actually stopped since the new coming session are routed to the new Kyuubi server and others. + diff --git a/docs/deployment/hive_metastore.md b/docs/deployment/hive_metastore.md index d4592b75b84..f60465a1aad 100644 --- a/docs/deployment/hive_metastore.md +++ b/docs/deployment/hive_metastore.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Integration with Hive Metastore @@ -31,7 +30,7 @@ In this section, you will learn how to configure Kyuubi to interact with Hive Me - A Spark binary distribution built with `-Phive` support - Use the built-in one in the Kyuubi distribution - Download from [Spark official website](https://spark.apache.org/downloads.html) - - Build from Spark source, [Building With Hive and JDBC Support](http://spark.apache.org/docs/latest/building-spark.html#building-with-hive-and-jdbc-support) + - Build from Spark source, [Building With Hive and JDBC Support](https://spark.apache.org/docs/latest/building-spark.html#building-with-hive-and-jdbc-support) - A copy of Hive client configuration So the whole thing here is to let Spark applications use this copy of Hive configuration to start a Hive metastore client for their own to talk to the Hive metastore server. @@ -90,6 +89,7 @@ Beeline version 2.3.7 by Apache Hive +-----------+------------+--------------+ No rows selected (0.04 seconds) ``` + Using this mode for experimental purposes only. In a real production environment, we always have a communal standalone metadata store, @@ -104,18 +104,18 @@ Use remote metastore database or server mode depends on the server-side configur ### Remote Metastore Database -Name | Value | Meaning ---- | --- | --- -javax.jdo.option.ConnectionURL | jdbc:mysql://<hostname>/<databaseName>?
createDatabaseIfNotExist=true | metadata is stored in a MySQL server -javax.jdo.option.ConnectionDriverName | com.mysql.jdbc.Driver | MySQL JDBC driver class -javax.jdo.option.ConnectionUserName | <username> | user name for connecting to MySQL server -javax.jdo.option.ConnectionPassword | <password> | password for connecting to MySQL server +| Name | Value | Meaning | +|---------------------------------------|--------------------------------------------------------------------------------------|------------------------------------------| +| javax.jdo.option.ConnectionURL | jdbc:mysql://<hostname>/<databaseName>?
createDatabaseIfNotExist=true | metadata is stored in a MySQL server | +| javax.jdo.option.ConnectionDriverName | com.mysql.jdbc.Driver | MySQL JDBC driver class | +| javax.jdo.option.ConnectionUserName | <username> | user name for connecting to MySQL server | +| javax.jdo.option.ConnectionPassword | <password> | password for connecting to MySQL server | ### Remote Metastore Server -Name | Value | Meaning ---- | --- | --- -hive.metastore.uris | thrift://<host>:<port>,thrift://<host1>:<port1> |
host and port for the Thrift metastore server.
+| Name | Value | Meaning | +|---------------------|-------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------| +| hive.metastore.uris | thrift://<host>:<port>,thrift://<host1>:<port1> |
host and port for the Thrift metastore server.
| ## Activate Configurations @@ -199,12 +199,13 @@ Caused by: org.apache.thrift.TApplicationException: Invalid method name: 'get_ta ... 93 more ``` -To prevent this problem, we can use Spark's [Interacting with Different Versions of Hive Metastore](http://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore). +To prevent this problem, we can use Spark's [Interacting with Different Versions of Hive Metastore](https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore). ## Further Readings - Hive Wiki - [Hive Metastore Administration](https://cwiki.apache.org/confluence/display/Hive/AdminManual+Metastore+Administration) - Spark Online Documentation - - [Custom Hadoop/Hive Configuration](http://spark.apache.org/docs/latest/configuration.html#custom-hadoophive-configuration) - - [Hive Tables](http://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html) + - [Custom Hadoop/Hive Configuration](https://spark.apache.org/docs/latest/configuration.html#custom-hadoophive-configuration) + - [Hive Tables](https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html) + diff --git a/docs/deployment/kyuubi_on_kubernetes.md b/docs/deployment/kyuubi_on_kubernetes.md index 03836629e7d..8bb1d88c3fe 100644 --- a/docs/deployment/kyuubi_on_kubernetes.md +++ b/docs/deployment/kyuubi_on_kubernetes.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Deploy Kyuubi On Kubernetes @@ -28,7 +27,7 @@ If you want to deploy Kyuubi on Kubernetes, you'd better get a sense of the foll * [Kubectl](https://kubernetes.io/docs/reference/kubectl/overview/) * KubeConfig of the target cluster -## Kyuubi Official Docker Image +## Kyuubi Official Docker Image You can find the official docker image at [Apache Kyuubi Docker Hub](https://registry.hub.docker.com/r/apache/kyuubi). @@ -37,6 +36,7 @@ You can find the official docker image at [Apache Kyuubi Docker Hub](https://reg You can build custom Docker images from the `${KYUUBI_HOME}/bin/docker-image-tool.sh` contained in the binary package. Examples: + ```shell - Build and push image with tag "v1.4.0" to docker.io/myrepo $0 -r docker.io/myrepo -t v1.4.0 build @@ -102,18 +102,22 @@ If you want to know kyuubi engine on kubernetes configurations, you can refer to If you do not use Service or HostNetwork to get the IP address of the node where Kyuubi deployed. You should connect like: + ```shell kubectl exec -it kyuubi-example -- /bin/bash ${KYUUBI_HOME}/bin/beeline -u 'jdbc:hive2://localhost:10009' ``` Or you can submit tasks directly through local beeline: + ```shell ${KYUUBI_HOME}/bin/beeline -u 'jdbc:hive2://${hostname}:${port}' ``` + As using service nodePort, port means nodePort and hostname means any hostname of kubernetes node. As using HostNetwork, port means kyuubi containerPort and hostname means hostname of node where Kyuubi deployed. -## TODO +## TODO + Kyuubi will provide other connection methods in the future, like `Ingress`, `Load Balance`. diff --git a/docs/deployment/migration-guide.md b/docs/deployment/migration-guide.md index 86efd7a0cb5..fc916048c43 100644 --- a/docs/deployment/migration-guide.md +++ b/docs/deployment/migration-guide.md @@ -1,36 +1,43 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi Migration Guide -## Upgrading from Kyuubi 1.6 to 1.7 +## Upgrading from Kyuubi 1.7.0 to 1.7.1 + +* Since Kyuubi 1.7.1, `protocolVersion` is removed from the request parameters of the REST API `Open(create) a session`. All removed or unknown parameters will be silently ignored and affects nothing. +* Since Kyuubi 1.7.1, `confOverlay` is supported in the request parameters of the REST API `Create an operation with EXECUTE_STATEMENT type`. + +## Upgrading from Kyuubi 1.6 to 1.7 + * In Kyuubi 1.7, `kyuubi.ha.zookeeper.engine.auth.type` does not fallback to `kyuubi.ha.zookeeper.auth.type`. When Kyuubi engine does Kerberos authentication with Zookeeper, user needs to explicitly set `kyuubi.ha.zookeeper.engine.auth.type` to `KERBEROS`. * Since Kyuubi 1.7, Kyuubi returns engine's information for `GetInfo` request instead of server. To restore the previous behavior, set `kyuubi.server.info.provider` to `SERVER`. * Since Kyuubi 1.7, Kyuubi session type `SQL` is refactored to `INTERACTIVE`, because Kyuubi supports not only `SQL` session, but also `SCALA` and `PYTHON` sessions. User need to use `INTERACTIVE` sessionType to look up the session event. -* Since Kyuubi 1.7, the REST API of `Open(create) a session` will not contains parameters `user` `password` and `IpAddr`. User and password should be set in `Authorization` of http request if needed. +* Since Kyuubi 1.7, the REST API of `Open(create) a session` will not contain parameters `user` `password` and `IpAddr`. User and password should be set in `Authorization` of http request if needed. ## Upgrading from Kyuubi 1.6.0 to 1.6.1 + * Since Kyuubi 1.6.1, `kyuubi.ha.zookeeper.engine.auth.type` does not fallback to `kyuubi.ha.zookeeper.auth.type`. When Kyuubi engine does Kerberos authentication with Zookeeper, user needs to explicitly set `kyuubi.ha.zookeeper.engine.auth.type` to `KERBEROS`. ## Upgrading from Kyuubi 1.5 to 1.6 + * Kyuubi engine gets Zookeeper principal & keytab from `kyuubi.ha.zookeeper.auth.principal` & `kyuubi.ha.zookeeper.auth.keytab`. `kyuubi.ha.zookeeper.auth.principal` & `kyuubi.ha.zookeeper.auth.keytab` fallback to `kyuubi.kinit.principal` & `kyuubi.kinit.keytab` when not set. Since Kyuubi 1.6, `kyuubi.kinit.principal` & `kyuubi.kinit.keytab` are filtered out from Kyuubi engine's conf for better security. diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md index 539ca823f6b..960f2c328e8 100644 --- a/docs/deployment/settings.md +++ b/docs/deployment/settings.md @@ -1,548 +1,455 @@ - - - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> + # Introduction to the Kyuubi Configurations System Kyuubi provides several ways to configure the system and corresponding engines. - ## Environments - -You can configure the environment variables in `$KYUUBI_HOME/conf/kyuubi-env.sh`, e.g, `JAVA_HOME`, then this java runtime will be used both for Kyuubi server instance and the applications it launches. You can also change the variable in the subprocess's env configuration file, e.g.`$SPARK_HOME/conf/spark-env.sh` to use more specific ENV for SQL engine applications. -```bash -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# -# - JAVA_HOME Java runtime to use. By default use "java" from PATH. -# -# -# - KYUUBI_CONF_DIR Directory containing the Kyuubi configurations to use. -# (Default: $KYUUBI_HOME/conf) -# - KYUUBI_LOG_DIR Directory for Kyuubi server-side logs. -# (Default: $KYUUBI_HOME/logs) -# - KYUUBI_PID_DIR Directory stores the Kyuubi instance pid file. -# (Default: $KYUUBI_HOME/pid) -# - KYUUBI_MAX_LOG_FILES Maximum number of Kyuubi server logs can rotate to. -# (Default: 5) -# - KYUUBI_JAVA_OPTS JVM options for the Kyuubi server itself in the form "-Dx=y". -# (Default: none). -# - KYUUBI_CTL_JAVA_OPTS JVM options for the Kyuubi ctl itself in the form "-Dx=y". -# (Default: none). -# - KYUUBI_BEELINE_OPTS JVM options for the Kyuubi BeeLine in the form "-Dx=Y". -# (Default: none) -# - KYUUBI_NICENESS The scheduling priority for Kyuubi server. -# (Default: 0) -# - KYUUBI_WORK_DIR_ROOT Root directory for launching sql engine applications. -# (Default: $KYUUBI_HOME/work) -# - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use. -# - YARN_CONF_DIR Directory containing the YARN configuration to use. -# -# - SPARK_HOME Spark distribution which you would like to use in Kyuubi. -# - SPARK_CONF_DIR Optional directory where the Spark configuration lives. -# (Default: $SPARK_HOME/conf) -# - FLINK_HOME Flink distribution which you would like to use in Kyuubi. -# - FLINK_CONF_DIR Optional directory where the Flink configuration lives. -# (Default: $FLINK_HOME/conf) -# - FLINK_HADOOP_CLASSPATH Required Hadoop jars when you use the Kyuubi Flink engine. -# - HIVE_HOME Hive distribution which you would like to use in Kyuubi. -# - HIVE_CONF_DIR Optional directory where the Hive configuration lives. -# (Default: $HIVE_HOME/conf) -# - HIVE_HADOOP_CLASSPATH Required Hadoop jars when you use the Kyuubi Hive engine. -# - - -## Examples ## - -# export JAVA_HOME=/usr/jdk64/jdk1.8.0_152 -# export SPARK_HOME=/opt/spark -# export FLINK_HOME=/opt/flink -# export HIVE_HOME=/opt/hive -# export FLINK_HADOOP_CLASSPATH=/path/to/hadoop-client-runtime-3.3.2.jar:/path/to/hadoop-client-api-3.3.2.jar -# export HIVE_HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/common/lib/commons-collections-3.2.2.jar:${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-3.1.0.jar:${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-3.1.0.jar:${HADOOP_HOME}/share/hadoop/common/lib/htrace-core4-4.1.0-incubating.jar -# export HADOOP_CONF_DIR=/usr/ndp/current/mapreduce_client/conf -# export YARN_CONF_DIR=/usr/ndp/current/yarn/conf -# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -XX:MaxDirectMemorySize=1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -XX:NewRatio=3 -XX:MetaspaceSize=512m" -# export KYUUBI_BEELINE_OPTS="-Xmx2g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark" -``` - -For the environment variables that only needed to be transferred into engine side, you can set it with a Kyuubi configuration item formatted `kyuubi.engineEnv.VAR_NAME`. For example, with `kyuubi.engineEnv.SPARK_DRIVER_MEMORY=4g`, the environment variable `SPARK_DRIVER_MEMORY` with value `4g` would be transferred into engine side. With `kyuubi.engineEnv.SPARK_CONF_DIR=/apache/confs/spark/conf`, the value of `SPARK_CONF_DIR` in engine side is set to `/apache/confs/spark/conf`. - -## Kyuubi Configurations - -You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. For example: -```bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# +You can configure the environment variables in `$KYUUBI_HOME/conf/kyuubi-env.sh`, e.g, `JAVA_HOME`, then this java runtime will be used both for Kyuubi server instance and the applications it launches. You can also change the variable in the subprocess's env configuration file, e.g.`$SPARK_HOME/conf/spark-env.sh` to use more specific ENV for SQL engine applications. see `$KYUUBI_HOME/conf/kyuubi-env.sh.template` as an example. +For the environment variables that only needed to be transferred into engine side, you can set it with a Kyuubi configuration item formatted `kyuubi.engineEnv.VAR_NAME`. For example, with `kyuubi.engineEnv.SPARK_DRIVER_MEMORY=4g`, the environment variable `SPARK_DRIVER_MEMORY` with value `4g` would be transferred into engine side. With `kyuubi.engineEnv.SPARK_CONF_DIR=/apache/confs/spark/conf`, the value of `SPARK_CONF_DIR` on the engine side is set to `/apache/confs/spark/conf`. ## Kyuubi Configurations -# -# kyuubi.authentication NONE -# kyuubi.frontend.bind.host localhost -# kyuubi.frontend.bind.port 10009 -# - -# Details in https://kyuubi.apache.org/docs/latest/deployment/settings.html -``` +You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.conf`, see `$KYUUBI_HOME/conf/kyuubi-defaults.conf.template` as an example. ### Authentication -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.authentication|NONE|A comma separated list of client authentication types.
  • NOSASL: raw transport.
  • NONE: no authentication check.
  • KERBEROS: Kerberos/GSSAPI authentication.
  • CUSTOM: User-defined authentication.
  • JDBC: JDBC query authentication.
  • LDAP: Lightweight Directory Access Protocol authentication.
Note that: For KERBEROS, it is SASL/GSSAPI mechanism, and for NONE, CUSTOM and LDAP, they are all SASL/PLAIN mechanism. If only NOSASL is specified, the authentication will be NOSASL. For SASL authentication, KERBEROS and PLAIN auth type are supported at the same time, and only the first specified PLAIN auth type is valid.|seq|1.0.0 -kyuubi.authentication.custom.class|<undefined>|User-defined authentication implementation of org.apache.kyuubi.service.authentication.PasswdAuthenticationProvider|string|1.3.0 -kyuubi.authentication.jdbc.driver.class|<undefined>|Driver class name for JDBC Authentication Provider.|string|1.6.0 -kyuubi.authentication.jdbc.password|<undefined>|Database password for JDBC Authentication Provider.|string|1.6.0 -kyuubi.authentication.jdbc.query|<undefined>|Query SQL template with placeholders for JDBC Authentication Provider to execute. Authentication passes if the result set is not empty.The SQL statement must start with the `SELECT` clause. Available placeholders are `${user}` and `${password}`.|string|1.6.0 -kyuubi.authentication.jdbc.url|<undefined>|JDBC URL for JDBC Authentication Provider.|string|1.6.0 -kyuubi.authentication.jdbc.user|<undefined>|Database user for JDBC Authentication Provider.|string|1.6.0 -kyuubi.authentication.ldap.base.dn|<undefined>|LDAP base DN.|string|1.0.0 -kyuubi.authentication.ldap.domain|<undefined>|LDAP domain.|string|1.0.0 -kyuubi.authentication.ldap.guidKey|uid|LDAP attribute name whose values are unique in this LDAP server.For example:uid or cn.|string|1.2.0 -kyuubi.authentication.ldap.url|<undefined>|SPACE character separated LDAP connection URL(s).|string|1.0.0 -kyuubi.authentication.sasl.qop|auth|Sasl QOP enable higher levels of protection for Kyuubi communication with clients.
  • auth - authentication only (default)
  • auth-int - authentication plus integrity protection
  • auth-conf - authentication plus integrity and confidentiality protection. This is applicable only if Kyuubi is configured to use Kerberos authentication.
|string|1.0.0 - +| Key | Default | Meaning | Type | Since | +|-----------------------------------------------|-------------------||--------|-------| +| kyuubi.authentication | NONE | A comma-separated list of client authentication types.
  • NOSASL: raw transport.
  • NONE: no authentication check.
  • KERBEROS: Kerberos/GSSAPI authentication.
  • CUSTOM: User-defined authentication.
  • JDBC: JDBC query authentication.
  • LDAP: Lightweight Directory Access Protocol authentication.
The following tree describes the catalog of each option.
  • NOSASL
  • SASL
    • SASL/PLAIN
      • NONE
      • LDAP
      • JDBC
      • CUSTOM
    • SASL/GSSAPI
      • KERBEROS
Note that: for SASL authentication, KERBEROS and PLAIN auth types are supported at the same time, and only the first specified PLAIN auth type is valid. | seq | 1.0.0 | +| kyuubi.authentication.custom.class | <undefined> | User-defined authentication implementation of org.apache.kyuubi.service.authentication.PasswdAuthenticationProvider | string | 1.3.0 | +| kyuubi.authentication.jdbc.driver.class | <undefined> | Driver class name for JDBC Authentication Provider. | string | 1.6.0 | +| kyuubi.authentication.jdbc.password | <undefined> | Database password for JDBC Authentication Provider. | string | 1.6.0 | +| kyuubi.authentication.jdbc.query | <undefined> | Query SQL template with placeholders for JDBC Authentication Provider to execute. Authentication passes if the result set is not empty.The SQL statement must start with the `SELECT` clause. Available placeholders are `${user}` and `${password}`. | string | 1.6.0 | +| kyuubi.authentication.jdbc.url | <undefined> | JDBC URL for JDBC Authentication Provider. | string | 1.6.0 | +| kyuubi.authentication.jdbc.user | <undefined> | Database user for JDBC Authentication Provider. | string | 1.6.0 | +| kyuubi.authentication.ldap.baseDN | <undefined> | LDAP base DN. | string | 1.7.0 | +| kyuubi.authentication.ldap.binddn | <undefined> | The user with which to bind to the LDAP server, and search for the full domain name of the user being authenticated. This should be the full domain name of the user, and should have search access across all users in the LDAP tree. If not specified, then the user being authenticated will be used as the bind user. For example: CN=bindUser,CN=Users,DC=subdomain,DC=domain,DC=com | string | 1.7.0 | +| kyuubi.authentication.ldap.bindpw | <undefined> | The password for the bind user, to be used to search for the full name of the user being authenticated. If the username is specified, this parameter must also be specified. | string | 1.7.0 | +| kyuubi.authentication.ldap.customLDAPQuery | <undefined> | A full LDAP query that LDAP Atn provider uses to execute against LDAP Server. If this query returns a null resultset, the LDAP Provider fails the Authentication request, succeeds if the user is part of the resultset.For example: `(&(objectClass=group)(objectClass=top)(instanceType=4)(cn=Domain*))`, `(&(objectClass=person)(|(sAMAccountName=admin)(|(memberOf=CN=Domain Admins,CN=Users,DC=domain,DC=com)(memberOf=CN=Administrators,CN=Builtin,DC=domain,DC=com))))` | string | 1.7.0 | +| kyuubi.authentication.ldap.domain | <undefined> | LDAP domain. | string | 1.0.0 | +| kyuubi.authentication.ldap.groupClassKey | groupOfNames | LDAP attribute name on the group entry that is to be used in LDAP group searches. For example: group, groupOfNames or groupOfUniqueNames. | string | 1.7.0 | +| kyuubi.authentication.ldap.groupDNPattern | <undefined> | COLON-separated list of patterns to use to find DNs for group entities in this directory. Use %s where the actual group name is to be substituted for. For example: CN=%s,CN=Groups,DC=subdomain,DC=domain,DC=com. | string | 1.7.0 | +| kyuubi.authentication.ldap.groupFilter || COMMA-separated list of LDAP Group names (short name not full DNs). For example: HiveAdmins,HadoopAdmins,Administrators | seq | 1.7.0 | +| kyuubi.authentication.ldap.groupMembershipKey | member | LDAP attribute name on the group object that contains the list of distinguished names for the user, group, and contact objects that are members of the group. For example: member, uniqueMember or memberUid | string | 1.7.0 | +| kyuubi.authentication.ldap.guidKey | uid | LDAP attribute name whose values are unique in this LDAP server. For example: uid or CN. | string | 1.2.0 | +| kyuubi.authentication.ldap.url | <undefined> | SPACE character separated LDAP connection URL(s). | string | 1.0.0 | +| kyuubi.authentication.ldap.userDNPattern | <undefined> | COLON-separated list of patterns to use to find DNs for users in this directory. Use %s where the actual group name is to be substituted for. For example: CN=%s,CN=Users,DC=subdomain,DC=domain,DC=com. | string | 1.7.0 | +| kyuubi.authentication.ldap.userFilter || COMMA-separated list of LDAP usernames (just short names, not full DNs). For example: hiveuser,impalauser,hiveadmin,hadoopadmin | seq | 1.7.0 | +| kyuubi.authentication.ldap.userMembershipKey | <undefined> | LDAP attribute name on the user object that contains groups of which the user is a direct member, except for the primary group, which is represented by the primaryGroupId. For example: memberOf | string | 1.7.0 | +| kyuubi.authentication.sasl.qop | auth | Sasl QOP enable higher levels of protection for Kyuubi communication with clients.
  • auth - authentication only (default)
  • auth-int - authentication plus integrity protection
  • auth-conf - authentication plus integrity and confidentiality protection. This is applicable only if Kyuubi is configured to use Kerberos authentication.
| string | 1.0.0 | ### Backend -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.backend.engine.exec.pool.keepalive.time|PT1M|Time(ms) that an idle async thread of the operation execution thread pool will wait for a new task to arrive before terminating in SQL engine applications|duration|1.0.0 -kyuubi.backend.engine.exec.pool.shutdown.timeout|PT10S|Timeout(ms) for the operation execution thread pool to terminate in SQL engine applications|duration|1.0.0 -kyuubi.backend.engine.exec.pool.size|100|Number of threads in the operation execution thread pool of SQL engine applications|int|1.0.0 -kyuubi.backend.engine.exec.pool.wait.queue.size|100|Size of the wait queue for the operation execution thread pool in SQL engine applications|int|1.0.0 -kyuubi.backend.server.event.json.log.path|file:///tmp/kyuubi/events|The location of server events go for the builtin JSON logger|string|1.4.0 -kyuubi.backend.server.event.loggers||A comma separated list of server history loggers, where session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.backend.server.event.json.log.path
  • JDBC: to be done
  • CUSTOM: User-defined event handlers.
Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, user need to implement a class which is a child of org.apache.kyuubi.events.handler.CustomEventHandlerProvider which has zero-arg constructor.|seq|1.4.0 -kyuubi.backend.server.exec.pool.keepalive.time|PT1M|Time(ms) that an idle async thread of the operation execution thread pool will wait for a new task to arrive before terminating in Kyuubi server|duration|1.0.0 -kyuubi.backend.server.exec.pool.shutdown.timeout|PT10S|Timeout(ms) for the operation execution thread pool to terminate in Kyuubi server|duration|1.0.0 -kyuubi.backend.server.exec.pool.size|100|Number of threads in the operation execution thread pool of Kyuubi server|int|1.0.0 -kyuubi.backend.server.exec.pool.wait.queue.size|100|Size of the wait queue for the operation execution thread pool of Kyuubi server|int|1.0.0 - +| Key | Default | Meaning | Type | Since | +|--------------------------------------------------|---------------------------||----------|-------| +| kyuubi.backend.engine.exec.pool.keepalive.time | PT1M | Time(ms) that an idle async thread of the operation execution thread pool will wait for a new task to arrive before terminating in SQL engine applications | duration | 1.0.0 | +| kyuubi.backend.engine.exec.pool.shutdown.timeout | PT10S | Timeout(ms) for the operation execution thread pool to terminate in SQL engine applications | duration | 1.0.0 | +| kyuubi.backend.engine.exec.pool.size | 100 | Number of threads in the operation execution thread pool of SQL engine applications | int | 1.0.0 | +| kyuubi.backend.engine.exec.pool.wait.queue.size | 100 | Size of the wait queue for the operation execution thread pool in SQL engine applications | int | 1.0.0 | +| kyuubi.backend.server.event.json.log.path | file:///tmp/kyuubi/events | The location of server events go for the built-in JSON logger | string | 1.4.0 | +| kyuubi.backend.server.event.loggers || A comma-separated list of server history loggers, where session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.backend.server.event.json.log.path
  • JDBC: to be done
  • CUSTOM: User-defined event handlers.
Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a class which is a child of org.apache.kyuubi.events.handler.CustomEventHandlerProvider which has a zero-arg constructor. | seq | 1.4.0 | +| kyuubi.backend.server.exec.pool.keepalive.time | PT1M | Time(ms) that an idle async thread of the operation execution thread pool will wait for a new task to arrive before terminating in Kyuubi server | duration | 1.0.0 | +| kyuubi.backend.server.exec.pool.shutdown.timeout | PT10S | Timeout(ms) for the operation execution thread pool to terminate in Kyuubi server | duration | 1.0.0 | +| kyuubi.backend.server.exec.pool.size | 100 | Number of threads in the operation execution thread pool of Kyuubi server | int | 1.0.0 | +| kyuubi.backend.server.exec.pool.wait.queue.size | 100 | Size of the wait queue for the operation execution thread pool of Kyuubi server | int | 1.0.0 | ### Batch -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.batch.application.check.interval|PT5S|The interval to check batch job application information.|duration|1.6.0 -kyuubi.batch.application.starvation.timeout|PT3M|Threshold above which to warn batch application may be starved.|duration|1.7.0 -kyuubi.batch.conf.ignore.list||A comma separated list of ignored keys for batch conf. If the batch conf contains any of them, the key and the corresponding value will be removed silently during batch job submission. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering. You can also pre-define some config for batch job submission with prefix: kyuubi.batchConf.[batchType]. For example, you can pre-define `spark.master` for spark batch job with key `kyuubi.batchConf.spark.spark.master`.|seq|1.6.0 -kyuubi.batch.session.idle.timeout|PT6H|Batch session idle timeout, it will be closed when it's not accessed for this duration|duration|1.6.2 - +| Key | Default | Meaning | Type | Since | +|---------------------------------------------|---------||----------|-------| +| kyuubi.batch.application.check.interval | PT5S | The interval to check batch job application information. | duration | 1.6.0 | +| kyuubi.batch.application.starvation.timeout | PT3M | Threshold above which to warn batch application may be starved. | duration | 1.7.0 | +| kyuubi.batch.conf.ignore.list || A comma-separated list of ignored keys for batch conf. If the batch conf contains any of them, the key and the corresponding value will be removed silently during batch job submission. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering. You can also pre-define some config for batch job submission with the prefix: kyuubi.batchConf.[batchType]. For example, you can pre-define `spark.master` for the Spark batch job with key `kyuubi.batchConf.spark.spark.master`. | seq | 1.6.0 | +| kyuubi.batch.session.idle.timeout | PT6H | Batch session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.6.2 | ### Credentials -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.credentials.check.interval|PT5M|The interval to check the expiration of cached pairs.|duration|1.6.0 -kyuubi.credentials.hadoopfs.enabled|true|Whether to renew Hadoop filesystem delegation tokens|boolean|1.4.0 -kyuubi.credentials.hadoopfs.uris||Extra Hadoop filesystem URIs for which to request delegation tokens. The filesystem that hosts fs.defaultFS does not need to be listed here.|seq|1.4.0 -kyuubi.credentials.hive.enabled|true|Whether to renew Hive metastore delegation token|boolean|1.4.0 -kyuubi.credentials.idle.timeout|PT6H|inactive users' credentials will be expired after a configured timeout|duration|1.6.0 -kyuubi.credentials.renewal.interval|PT1H|How often Kyuubi renews one user's delegation tokens|duration|1.4.0 -kyuubi.credentials.renewal.retry.wait|PT1M|How long to wait before retrying to fetch new credentials after a failure.|duration|1.4.0 -kyuubi.credentials.update.wait.timeout|PT1M|How long to wait until credentials are ready.|duration|1.5.0 - +| Key | Default | Meaning | Type | Since | +|----------------------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.credentials.check.interval | PT5M | The interval to check the expiration of cached pairs. | duration | 1.6.0 | +| kyuubi.credentials.hadoopfs.enabled | true | Whether to renew Hadoop filesystem delegation tokens | boolean | 1.4.0 | +| kyuubi.credentials.hadoopfs.uris || Extra Hadoop filesystem URIs for which to request delegation tokens. The filesystem that hosts fs.defaultFS does not need to be listed here. | seq | 1.4.0 | +| kyuubi.credentials.hive.enabled | true | Whether to renew Hive metastore delegation token | boolean | 1.4.0 | +| kyuubi.credentials.idle.timeout | PT6H | The inactive users' credentials will be expired after a configured timeout | duration | 1.6.0 | +| kyuubi.credentials.renewal.interval | PT1H | How often Kyuubi renews one user's delegation tokens | duration | 1.4.0 | +| kyuubi.credentials.renewal.retry.wait | PT1M | How long to wait before retrying to fetch new credentials after a failure. | duration | 1.4.0 | +| kyuubi.credentials.update.wait.timeout | PT1M | How long to wait until the credentials are ready. | duration | 1.5.0 | ### Ctl -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.ctl.batch.log.on.failure.timeout|PT10S|The timeout for fetching remaining batch logs if the batch failed.|duration|1.6.1 -kyuubi.ctl.batch.log.query.interval|PT3S|The interval for fetching batch logs.|duration|1.6.0 -kyuubi.ctl.rest.auth.schema|basic|The authentication schema. Valid values are: basic, spnego.|string|1.6.0 -kyuubi.ctl.rest.base.url|<undefined>|The REST API base URL, which contains the scheme (http:// or https://), host name, port number|string|1.6.0 -kyuubi.ctl.rest.connect.timeout|PT30S|The timeout[ms] for establishing the connection with the kyuubi server.A timeout value of zero is interpreted as an infinite timeout.|duration|1.6.0 -kyuubi.ctl.rest.request.attempt.wait|PT3S|How long to wait between attempts of ctl rest request.|duration|1.6.0 -kyuubi.ctl.rest.request.max.attempts|3|The max attempts number for ctl rest request.|int|1.6.0 -kyuubi.ctl.rest.socket.timeout|PT2M|The timeout[ms] for waiting for data packets after connection is established.A timeout value of zero is interpreted as an infinite timeout.|duration|1.6.0 -kyuubi.ctl.rest.spnego.host|<undefined>|When auth schema is spnego, need to config spnego host.|string|1.6.0 - +| Key | Default | Meaning | Type | Since | +|-----------------------------------------|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.ctl.batch.log.on.failure.timeout | PT10S | The timeout for fetching remaining batch logs if the batch failed. | duration | 1.6.1 | +| kyuubi.ctl.batch.log.query.interval | PT3S | The interval for fetching batch logs. | duration | 1.6.0 | +| kyuubi.ctl.rest.auth.schema | basic | The authentication schema. Valid values are: basic, spnego. | string | 1.6.0 | +| kyuubi.ctl.rest.base.url | <undefined> | The REST API base URL, which contains the scheme (http:// or https://), hostname, port number | string | 1.6.0 | +| kyuubi.ctl.rest.connect.timeout | PT30S | The timeout[ms] for establishing the connection with the kyuubi server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.6.0 | +| kyuubi.ctl.rest.request.attempt.wait | PT3S | How long to wait between attempts of ctl rest request. | duration | 1.6.0 | +| kyuubi.ctl.rest.request.max.attempts | 3 | The max attempts number for ctl rest request. | int | 1.6.0 | +| kyuubi.ctl.rest.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.6.0 | +| kyuubi.ctl.rest.spnego.host | <undefined> | When auth schema is spnego, need to config spnego host. | string | 1.6.0 | ### Delegation -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.delegation.key.update.interval|PT24H|unused yet|duration|1.0.0 -kyuubi.delegation.token.gc.interval|PT1H|unused yet|duration|1.0.0 -kyuubi.delegation.token.max.lifetime|PT168H|unused yet|duration|1.0.0 -kyuubi.delegation.token.renew.interval|PT168H|unused yet|duration|1.0.0 - +| Key | Default | Meaning | Type | Since | +|----------------------------------------|---------|------------|----------|-------| +| kyuubi.delegation.key.update.interval | PT24H | unused yet | duration | 1.0.0 | +| kyuubi.delegation.token.gc.interval | PT1H | unused yet | duration | 1.0.0 | +| kyuubi.delegation.token.max.lifetime | PT168H | unused yet | duration | 1.0.0 | +| kyuubi.delegation.token.renew.interval | PT168H | unused yet | duration | 1.0.0 | ### Engine -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.engine.connection.url.use.hostname|true|(deprecated) When true, engine register with hostname to zookeeper. When spark run on k8s with cluster mode, set to false to ensure that server can connect to engine|boolean|1.3.0 -kyuubi.engine.deregister.exception.classes||A comma separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself.|seq|1.2.0 -kyuubi.engine.deregister.exception.messages||A comma separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself.|seq|1.2.0 -kyuubi.engine.deregister.exception.ttl|PT30M|Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures.|duration|1.2.0 -kyuubi.engine.deregister.job.max.failures|4|Number of failures of job before deregistering the engine.|int|1.2.0 -kyuubi.engine.event.json.log.path|file:///tmp/kyuubi/events|The location of all the engine events go for the builtin JSON logger.
  • Local Path: start with 'file://'
  • HDFS Path: start with 'hdfs://'
|string|1.3.0 -kyuubi.engine.event.loggers|SPARK|A comma separated list of engine history loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: User-defined event handlers.
Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, user need to implement a class which is a child of org.apache.kyuubi.events.handler.CustomEventHandlerProvider which has zero-arg constructor.|seq|1.3.0 -kyuubi.engine.flink.extra.classpath|<undefined>|The extra classpath for the flink sql engine, for configuring location of hadoop client jars, etc|string|1.6.0 -kyuubi.engine.flink.java.options|<undefined>|The extra java options for the flink sql engine|string|1.6.0 -kyuubi.engine.flink.memory|1g|The heap memory for the flink sql engine|string|1.6.0 -kyuubi.engine.hive.event.loggers|JSON|A comma separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
|seq|1.7.0 -kyuubi.engine.hive.extra.classpath|<undefined>|The extra classpath for the hive query engine, for configuring location of hadoop client jars, etc|string|1.6.0 -kyuubi.engine.hive.java.options|<undefined>|The extra java options for the hive query engine|string|1.6.0 -kyuubi.engine.hive.memory|1g|The heap memory for the hive query engine|string|1.6.0 -kyuubi.engine.initialize.sql|SHOW DATABASES|SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver.|seq|1.2.0 -kyuubi.engine.jdbc.connection.password|<undefined>|The password is used for connecting to server|string|1.6.0 -kyuubi.engine.jdbc.connection.properties||The additional properties are used for connecting to server|seq|1.6.0 -kyuubi.engine.jdbc.connection.provider|<undefined>|The connection provider is used for getting a connection from server|string|1.6.0 -kyuubi.engine.jdbc.connection.url|<undefined>|The server url that engine will connect to|string|1.6.0 -kyuubi.engine.jdbc.connection.user|<undefined>|The user is used for connecting to server|string|1.6.0 -kyuubi.engine.jdbc.driver.class|<undefined>|The driver class for jdbc engine connection|string|1.6.0 -kyuubi.engine.jdbc.extra.classpath|<undefined>|The extra classpath for the jdbc query engine, for configuring location of jdbc driver, etc|string|1.6.0 -kyuubi.engine.jdbc.java.options|<undefined>|The extra java options for the jdbc query engine|string|1.6.0 -kyuubi.engine.jdbc.memory|1g|The heap memory for the jdbc query engine|string|1.6.0 -kyuubi.engine.jdbc.type|<undefined>|The short name of jdbc type|string|1.6.0 -kyuubi.engine.operation.convert.catalog.database.enabled|true|When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines|boolean|1.6.0 -kyuubi.engine.operation.log.dir.root|engine_operation_logs|Root directory for query operation log at engine-side.|string|1.4.0 -kyuubi.engine.pool.name|engine-pool|The name of engine pool.|string|1.5.0 -kyuubi.engine.pool.selectPolicy|RANDOM|The select policy of an engine from the corresponding engine pool engine for a session.
  • RANDOM - Randomly use the engine in the pool
  • POLLING - Polling use the engine in the pool
|string|1.7.0 -kyuubi.engine.pool.size|-1|The size of engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold).|int|1.4.0 -kyuubi.engine.pool.size.threshold|9|This parameter is introduced as a server-side parameter, and controls the upper limit of the engine pool.|int|1.4.0 -kyuubi.engine.session.initialize.sql||SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver.|seq|1.3.0 -kyuubi.engine.share.level|USER|Engines will be shared in different levels, available configs are:
  • CONNECTION: engine will not be shared but only used by the current client connection
  • USER: engine will be shared by all sessions created by a unique username, see also kyuubi.engine.share.level.subdomain
  • GROUP: engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the group name as the effective username, so here the group name is kind of special user who is able to visit the compute resources/data of a team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
  • SERVER: the App will be shared by Kyuubi servers
|string|1.2.0 -kyuubi.engine.share.level.sub.domain|<undefined>|(deprecated) - Using kyuubi.engine.share.level.subdomain instead|string|1.2.0 -kyuubi.engine.share.level.subdomain|<undefined>|Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper sub path. For example, for `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent.|string|1.4.0 -kyuubi.engine.single.spark.session|false|When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database.|boolean|1.3.0 -kyuubi.engine.spark.event.loggers|SPARK|A comma separated list of engine loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
|seq|1.7.0 -kyuubi.engine.spark.python.env.archive|<undefined>|Portable python env archive used for Spark engine python language mode.|string|1.7.0 -kyuubi.engine.spark.python.env.archive.exec.path|bin/python|The python exec path under the python env archive.|string|1.7.0 -kyuubi.engine.spark.python.home.archive|<undefined>|Spark archive containing $SPARK_HOME/python directory, which is used to init session python worker for python language mode.|string|1.7.0 -kyuubi.engine.trino.event.loggers|JSON|A comma separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
|seq|1.7.0 -kyuubi.engine.trino.extra.classpath|<undefined>|The extra classpath for the trino query engine, for configuring other libs which may need by the trino engine |string|1.6.0 -kyuubi.engine.trino.java.options|<undefined>|The extra java options for the trino query engine|string|1.6.0 -kyuubi.engine.trino.memory|1g|The heap memory for the trino query engine|string|1.6.0 -kyuubi.engine.type|SPARK_SQL|Specify the detailed engine that supported by the Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
  • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
  • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
  • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
  • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
  • JDBC: specify this engine type will launch a JDBC engine which can provide a mysql protocol connector, for now we only support Doris dialect.
|string|1.4.0 -kyuubi.engine.ui.retainedSessions|200|The number of SQL client sessions kept in the Kyuubi Query Engine web UI.|int|1.4.0 -kyuubi.engine.ui.retainedStatements|200|The number of statements kept in the Kyuubi Query Engine web UI.|int|1.4.0 -kyuubi.engine.ui.stop.enabled|true|When true, allows Kyuubi engine to be killed from the Spark Web UI.|boolean|1.3.0 -kyuubi.engine.user.isolated.spark.session|true|When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including: the temporary views, function registries, SQL configuration and the current database. Note that, it does not affect if the share level is connection or user.|boolean|1.6.0 -kyuubi.engine.user.isolated.spark.session.idle.interval|PT1M|The interval to check if the user isolated spark session is timeout.|duration|1.6.0 -kyuubi.engine.user.isolated.spark.session.idle.timeout|PT6H|If kyuubi.engine.user.isolated.spark.session is false, we will release the spark session if its corresponding user is inactive after this configured timeout.|duration|1.6.0 - +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------|---------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | +| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | +| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates:
  • ECHO: simply replies a welcome message.
  • GPT: a.k.a ChatGPT, powered by OpenAI.
| string | 1.8.0 | +| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | +| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | seq | 1.2.0 | +| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | seq | 1.2.0 | +| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | +| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | +| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger.
  • Local Path: start with 'file://'
  • HDFS Path: start with 'hdfs://'
| string | 1.3.0 | +| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the Spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: User-defined event handlers.
Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | +| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc | string | 1.6.0 | +| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine | string | 1.6.0 | +| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine | string | 1.6.0 | +| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | +| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | +| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | +| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | +| kyuubi.engine.jdbc.connection.provider | <undefined> | The connection provider is used for getting a connection from the server | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | +| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | +| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | +| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | +| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | +| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | +| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session.
  • RANDOM - Randomly use the engine in the pool
  • POLLING - Polling use the engine in the pool
| string | 1.7.0 | +| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | +| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | +| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | +| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are:
  • CONNECTION: engine will not be shared but only used by the current client connection
  • USER: engine will be shared by all sessions created by a unique username, see also kyuubi.engine.share.level.subdomain
  • GROUP: the engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the group name as the effective username, so here the group name is in value of special user who is able to visit the computing resources/data of the team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
  • SERVER: the App will be shared by Kyuubi servers
| string | 1.2.0 | +| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | +| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | +| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | +| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the Spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | +| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | +| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | +| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | +| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | +| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | +| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | +| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
  • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
  • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
  • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
  • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
  • JDBC: specify this engine type will launch a JDBC engine which can provide a MySQL protocol connector, for now we only support Doris dialect.
  • CHAT: specify this engine type will launch a Chat engine.
| string | 1.4.0 | +| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | +| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | ### Event -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.event.async.pool.keepalive.time|PT1M|Time(ms) that an idle async thread of the async event handler thread pool will wait for a new task to arrive before terminating|duration|1.7.0 -kyuubi.event.async.pool.size|8|Number of threads in the async event handler thread pool|int|1.7.0 -kyuubi.event.async.pool.wait.queue.size|100|Size of the wait queue for the async event handler thread pool|int|1.7.0 - +| Key | Default | Meaning | Type | Since | +|-----------------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.event.async.pool.keepalive.time | PT1M | Time(ms) that an idle async thread of the async event handler thread pool will wait for a new task to arrive before terminating | duration | 1.7.0 | +| kyuubi.event.async.pool.size | 8 | Number of threads in the async event handler thread pool | int | 1.7.0 | +| kyuubi.event.async.pool.wait.queue.size | 100 | Size of the wait queue for the async event handler thread pool | int | 1.7.0 | ### Frontend -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.frontend.backoff.slot.length|PT0.1S|(deprecated) Time to back off during login to the thrift frontend service.|duration|1.0.0 -kyuubi.frontend.bind.host|<undefined>|Hostname or IP of the machine on which to run the frontend services.|string|1.0.0 -kyuubi.frontend.bind.port|10009|(deprecated) Port of the machine on which to run the thrift frontend service via binary protocol.|int|1.0.0 -kyuubi.frontend.connection.url.use.hostname|true|When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issue.|boolean|1.5.0 -kyuubi.frontend.login.timeout|PT20S|(deprecated) Timeout for Thrift clients during login to the thrift frontend service.|duration|1.0.0 -kyuubi.frontend.max.message.size|104857600|(deprecated) Maximum message size in bytes a Kyuubi server will accept.|int|1.0.0 -kyuubi.frontend.max.worker.threads|999|(deprecated) Maximum number of threads in the of frontend worker thread pool for the thrift frontend service|int|1.0.0 -kyuubi.frontend.min.worker.threads|9|(deprecated) Minimum number of threads in the of frontend worker thread pool for the thrift frontend service|int|1.0.0 -kyuubi.frontend.mysql.bind.host|<undefined>|Hostname or IP of the machine on which to run the MySQL frontend service.|string|1.4.0 -kyuubi.frontend.mysql.bind.port|3309|Port of the machine on which to run the MySQL frontend service.|int|1.4.0 -kyuubi.frontend.mysql.max.worker.threads|999|Maximum number of threads in the command execution thread pool for the MySQL frontend service|int|1.4.0 -kyuubi.frontend.mysql.min.worker.threads|9|Minimum number of threads in the command execution thread pool for the MySQL frontend service|int|1.4.0 -kyuubi.frontend.mysql.netty.worker.threads|<undefined>|Number of thread in the netty worker event loop of MySQL frontend service. Use min(cpu_cores, 8) in default.|int|1.4.0 -kyuubi.frontend.mysql.worker.keepalive.time|PT1M|Time(ms) that an idle async thread of the command execution thread pool will wait for a new task to arrive before terminating in MySQL frontend service|duration|1.4.0 -kyuubi.frontend.protocols|THRIFT_BINARY|A comma separated list for all frontend protocols
  • THRIFT_BINARY - HiveServer2 compatible thrift binary protocol.
  • THRIFT_HTTP - HiveServer2 compatible thrift http protocol.
  • REST - Kyuubi defined REST API(experimental).
  • MYSQL - MySQL compatible text protocol(experimental).
  • TRINO - Trino compatible http protocol(experimental).
|seq|1.4.0 -kyuubi.frontend.proxy.http.client.ip.header|X-Real-IP|The http header to record the real client ip address. If your server is behind a load balancer or other proxy, the server will see this load balancer or proxy IP address as the client IP address, to get around this common issue, most load balancers or proxies offer the ability to record the real remote IP address in an HTTP header that will be added to the request for other devices to use. Note that, because the header value can be specified to any ip address, so it will not be used for authentication.|string|1.6.0 -kyuubi.frontend.rest.bind.host|<undefined>|Hostname or IP of the machine on which to run the REST frontend service.|string|1.4.0 -kyuubi.frontend.rest.bind.port|10099|Port of the machine on which to run the REST frontend service.|int|1.4.0 -kyuubi.frontend.rest.max.worker.threads|999|Maximum number of threads in the of frontend worker thread pool for the rest frontend service|int|1.6.2 -kyuubi.frontend.ssl.keystore.algorithm|<undefined>|SSL certificate keystore algorithm.|string|1.7.0 -kyuubi.frontend.ssl.keystore.password|<undefined>|SSL certificate keystore password.|string|1.7.0 -kyuubi.frontend.ssl.keystore.path|<undefined>|SSL certificate keystore location.|string|1.7.0 -kyuubi.frontend.ssl.keystore.type|<undefined>|SSL certificate keystore type.|string|1.7.0 -kyuubi.frontend.thrift.backoff.slot.length|PT0.1S|Time to back off during login to the thrift frontend service.|duration|1.4.0 -kyuubi.frontend.thrift.binary.bind.host|<undefined>|Hostname or IP of the machine on which to run the thrift frontend service via binary protocol.|string|1.4.0 -kyuubi.frontend.thrift.binary.bind.port|10009|Port of the machine on which to run the thrift frontend service via binary protocol.|int|1.4.0 -kyuubi.frontend.thrift.binary.ssl.disallowed.protocols|SSLv2,SSLv3|SSL versions to disallow for Kyuubi thrift binary frontend.|seq|1.7.0 -kyuubi.frontend.thrift.binary.ssl.enabled|false|Set this to true for using SSL encryption in thrift binary frontend server.|boolean|1.7.0 -kyuubi.frontend.thrift.binary.ssl.include.ciphersuites||A comma separated list of include SSL cipher suite names for thrift binary frontend.|seq|1.7.0 -kyuubi.frontend.thrift.http.allow.user.substitution|true|Allow alternate user to be specified as part of open connection request when using HTTP transport mode.|boolean|1.6.0 -kyuubi.frontend.thrift.http.bind.host|<undefined>|Hostname or IP of the machine on which to run the thrift frontend service via http protocol.|string|1.6.0 -kyuubi.frontend.thrift.http.bind.port|10010|Port of the machine on which to run the thrift frontend service via http protocol.|int|1.6.0 -kyuubi.frontend.thrift.http.compression.enabled|true|Enable thrift http compression via Jetty compression support|boolean|1.6.0 -kyuubi.frontend.thrift.http.cookie.auth.enabled|true|When true, Kyuubi in HTTP transport mode, will use cookie based authentication mechanism|boolean|1.6.0 -kyuubi.frontend.thrift.http.cookie.domain|<undefined>|Domain for the Kyuubi generated cookies|string|1.6.0 -kyuubi.frontend.thrift.http.cookie.is.httponly|true|HttpOnly attribute of the Kyuubi generated cookie.|boolean|1.6.0 -kyuubi.frontend.thrift.http.cookie.max.age|86400|Maximum age in seconds for server side cookie used by Kyuubi in HTTP mode.|int|1.6.0 -kyuubi.frontend.thrift.http.cookie.path|<undefined>|Path for the Kyuubi generated cookies|string|1.6.0 -kyuubi.frontend.thrift.http.max.idle.time|PT30M|Maximum idle time for a connection on the server when in HTTP mode.|duration|1.6.0 -kyuubi.frontend.thrift.http.path|cliservice|Path component of URL endpoint when in HTTP mode.|string|1.6.0 -kyuubi.frontend.thrift.http.request.header.size|6144|Request header size in bytes, when using HTTP transport mode. Jetty defaults used.|int|1.6.0 -kyuubi.frontend.thrift.http.response.header.size|6144|Response header size in bytes, when using HTTP transport mode. Jetty defaults used.|int|1.6.0 -kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites||A comma separated list of exclude SSL cipher suite names for thrift http frontend.|seq|1.7.0 -kyuubi.frontend.thrift.http.ssl.keystore.password|<undefined>|SSL certificate keystore password.|string|1.6.0 -kyuubi.frontend.thrift.http.ssl.keystore.path|<undefined>|SSL certificate keystore location.|string|1.6.0 -kyuubi.frontend.thrift.http.ssl.protocol.blacklist|SSLv2,SSLv3|SSL Versions to disable when using HTTP transport mode.|seq|1.6.0 -kyuubi.frontend.thrift.http.use.SSL|false|Set this to true for using SSL encryption in http mode.|boolean|1.6.0 -kyuubi.frontend.thrift.http.xsrf.filter.enabled|false|If enabled, Kyuubi will block any requests made to it over http if an X-XSRF-HEADER header is not present|boolean|1.6.0 -kyuubi.frontend.thrift.login.timeout|PT20S|Timeout for Thrift clients during login to the thrift frontend service.|duration|1.4.0 -kyuubi.frontend.thrift.max.message.size|104857600|Maximum message size in bytes a Kyuubi server will accept.|int|1.4.0 -kyuubi.frontend.thrift.max.worker.threads|999|Maximum number of threads in the of frontend worker thread pool for the thrift frontend service|int|1.4.0 -kyuubi.frontend.thrift.min.worker.threads|9|Minimum number of threads in the of frontend worker thread pool for the thrift frontend service|int|1.4.0 -kyuubi.frontend.thrift.worker.keepalive.time|PT1M|Keep-alive time (in milliseconds) for an idle worker thread|duration|1.4.0 -kyuubi.frontend.trino.bind.host|<undefined>|Hostname or IP of the machine on which to run the TRINO frontend service.|string|1.7.0 -kyuubi.frontend.trino.bind.port|10999|Port of the machine on which to run the TRINO frontend service.|int|1.7.0 -kyuubi.frontend.trino.max.worker.threads|999|Maximum number of threads in the of frontend worker thread pool for the trino frontend service|int|1.7.0 -kyuubi.frontend.worker.keepalive.time|PT1M|(deprecated) Keep-alive time (in milliseconds) for an idle worker thread|duration|1.0.0 - +| Key | Default | Meaning | Type | Since | +|--------------------------------------------------------|--------------------||----------|-------| +| kyuubi.frontend.backoff.slot.length | PT0.1S | (deprecated) Time to back off during login to the thrift frontend service. | duration | 1.0.0 | +| kyuubi.frontend.bind.host | <undefined> | Hostname or IP of the machine on which to run the frontend services. | string | 1.0.0 | +| kyuubi.frontend.bind.port | 10009 | (deprecated) Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.0.0 | +| kyuubi.frontend.connection.url.use.hostname | true | When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issues. | boolean | 1.5.0 | +| kyuubi.frontend.login.timeout | PT20S | (deprecated) Timeout for Thrift clients during login to the thrift frontend service. | duration | 1.0.0 | +| kyuubi.frontend.max.message.size | 104857600 | (deprecated) Maximum message size in bytes a Kyuubi server will accept. | int | 1.0.0 | +| kyuubi.frontend.max.worker.threads | 999 | (deprecated) Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | +| kyuubi.frontend.min.worker.threads | 9 | (deprecated) Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | +| kyuubi.frontend.mysql.bind.host | <undefined> | Hostname or IP of the machine on which to run the MySQL frontend service. | string | 1.4.0 | +| kyuubi.frontend.mysql.bind.port | 3309 | Port of the machine on which to run the MySQL frontend service. | int | 1.4.0 | +| kyuubi.frontend.mysql.max.worker.threads | 999 | Maximum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | +| kyuubi.frontend.mysql.min.worker.threads | 9 | Minimum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | +| kyuubi.frontend.mysql.netty.worker.threads | <undefined> | Number of thread in the netty worker event loop of MySQL frontend service. Use min(cpu_cores, 8) in default. | int | 1.4.0 | +| kyuubi.frontend.mysql.worker.keepalive.time | PT1M | Time(ms) that an idle async thread of the command execution thread pool will wait for a new task to arrive before terminating in MySQL frontend service | duration | 1.4.0 | +| kyuubi.frontend.protocols | THRIFT_BINARY,REST | A comma-separated list for all frontend protocols
  • THRIFT_BINARY - HiveServer2 compatible thrift binary protocol.
  • THRIFT_HTTP - HiveServer2 compatible thrift http protocol.
  • REST - Kyuubi defined REST API(experimental).
  • MYSQL - MySQL compatible text protocol(experimental).
  • TRINO - Trino compatible http protocol(experimental).
| seq | 1.4.0 | +| kyuubi.frontend.proxy.http.client.ip.header | X-Real-IP | The HTTP header to record the real client IP address. If your server is behind a load balancer or other proxy, the server will see this load balancer or proxy IP address as the client IP address, to get around this common issue, most load balancers or proxies offer the ability to record the real remote IP address in an HTTP header that will be added to the request for other devices to use. Note that, because the header value can be specified to any IP address, so it will not be used for authentication. | string | 1.6.0 | +| kyuubi.frontend.rest.bind.host | <undefined> | Hostname or IP of the machine on which to run the REST frontend service. | string | 1.4.0 | +| kyuubi.frontend.rest.bind.port | 10099 | Port of the machine on which to run the REST frontend service. | int | 1.4.0 | +| kyuubi.frontend.rest.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the rest frontend service | int | 1.6.2 | +| kyuubi.frontend.ssl.keystore.algorithm | <undefined> | SSL certificate keystore algorithm. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.type | <undefined> | SSL certificate keystore type. | string | 1.7.0 | +| kyuubi.frontend.thrift.backoff.slot.length | PT0.1S | Time to back off during login to the thrift frontend service. | duration | 1.4.0 | +| kyuubi.frontend.thrift.binary.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via the binary protocol. | string | 1.4.0 | +| kyuubi.frontend.thrift.binary.bind.port | 10009 | Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.4.0 | +| kyuubi.frontend.thrift.binary.ssl.disallowed.protocols | SSLv2,SSLv3 | SSL versions to disallow for Kyuubi thrift binary frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.binary.ssl.enabled | false | Set this to true for using SSL encryption in thrift binary frontend server. | boolean | 1.7.0 | +| kyuubi.frontend.thrift.binary.ssl.include.ciphersuites || A comma-separated list of include SSL cipher suite names for thrift binary frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.http.allow.user.substitution | true | Allow alternate user to be specified as part of open connection request when using HTTP transport mode. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via http protocol. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.bind.port | 10010 | Port of the machine on which to run the thrift frontend service via http protocol. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.compression.enabled | true | Enable thrift http compression via Jetty compression support | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.auth.enabled | true | When true, Kyuubi in HTTP transport mode, will use cookie-based authentication mechanism | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.domain | <undefined> | Domain for the Kyuubi generated cookies | string | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.is.httponly | true | HttpOnly attribute of the Kyuubi generated cookie. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.max.age | 86400 | Maximum age in seconds for server side cookie used by Kyuubi in HTTP mode. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.path | <undefined> | Path for the Kyuubi generated cookies | string | 1.6.0 | +| kyuubi.frontend.thrift.http.max.idle.time | PT30M | Maximum idle time for a connection on the server when in HTTP mode. | duration | 1.6.0 | +| kyuubi.frontend.thrift.http.path | cliservice | Path component of URL endpoint when in HTTP mode. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.request.header.size | 6144 | Request header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.response.header.size | 6144 | Response header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites || A comma-separated list of exclude SSL cipher suite names for thrift http frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.http.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.protocol.blacklist | SSLv2,SSLv3 | SSL Versions to disable when using HTTP transport mode. | seq | 1.6.0 | +| kyuubi.frontend.thrift.http.use.SSL | false | Set this to true for using SSL encryption in http mode. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.xsrf.filter.enabled | false | If enabled, Kyuubi will block any requests made to it over HTTP if an X-XSRF-HEADER header is not present | boolean | 1.6.0 | +| kyuubi.frontend.thrift.login.timeout | PT20S | Timeout for Thrift clients during login to the thrift frontend service. | duration | 1.4.0 | +| kyuubi.frontend.thrift.max.message.size | 104857600 | Maximum message size in bytes a Kyuubi server will accept. | int | 1.4.0 | +| kyuubi.frontend.thrift.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | +| kyuubi.frontend.thrift.min.worker.threads | 9 | Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | +| kyuubi.frontend.thrift.worker.keepalive.time | PT1M | Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.4.0 | +| kyuubi.frontend.trino.bind.host | <undefined> | Hostname or IP of the machine on which to run the TRINO frontend service. | string | 1.7.0 | +| kyuubi.frontend.trino.bind.port | 10999 | Port of the machine on which to run the TRINO frontend service. | int | 1.7.0 | +| kyuubi.frontend.trino.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the Trino frontend service | int | 1.7.0 | +| kyuubi.frontend.worker.keepalive.time | PT1M | (deprecated) Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.0.0 | ### Ha -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.ha.addresses||The connection string for the discovery ensemble|string|1.6.0 -kyuubi.ha.client.class|org.apache.kyuubi.ha.client.zookeeper.ZookeeperDiscoveryClient|Class name for service discovery client.
  • Zookeeper: org.apache.kyuubi.ha.client.zookeeper.ZookeeperDiscoveryClient
  • Etcd: org.apache.kyuubi.ha.client.etcd.EtcdDiscoveryClient
|string|1.6.0 -kyuubi.ha.etcd.lease.timeout|PT10S|Timeout for etcd keep alive lease. The kyuubi server will known unexpected loss of engine after up to this seconds.|duration|1.6.0 -kyuubi.ha.etcd.ssl.ca.path|<undefined>|Where the etcd CA certificate file is stored.|string|1.6.0 -kyuubi.ha.etcd.ssl.client.certificate.path|<undefined>|Where the etcd SSL certificate file is stored.|string|1.6.0 -kyuubi.ha.etcd.ssl.client.key.path|<undefined>|Where the etcd SSL key file is stored.|string|1.6.0 -kyuubi.ha.etcd.ssl.enabled|false|When set to true, will build a ssl secured etcd client.|boolean|1.6.0 -kyuubi.ha.namespace|kyuubi|The root directory for the service to deploy its instance uri|string|1.6.0 -kyuubi.ha.zookeeper.acl.enabled|false|Set to true if the zookeeper ensemble is kerberized|boolean|1.0.0 -kyuubi.ha.zookeeper.auth.digest|<undefined>|The digest auth string is used for zookeeper authentication, like: username:password.|string|1.3.2 -kyuubi.ha.zookeeper.auth.keytab|<undefined>|Location of Kyuubi server's keytab is used for zookeeper authentication.|string|1.3.2 -kyuubi.ha.zookeeper.auth.principal|<undefined>|Name of the Kerberos principal is used for zookeeper authentication.|string|1.3.2 -kyuubi.ha.zookeeper.auth.type|NONE|The type of zookeeper authentication, all candidates are
  • NONE
  • KERBEROS
  • DIGEST
|string|1.3.2 -kyuubi.ha.zookeeper.connection.base.retry.wait|1000|Initial amount of time to wait between retries to the zookeeper ensemble|int|1.0.0 -kyuubi.ha.zookeeper.connection.max.retries|3|Max retry times for connecting to the zookeeper ensemble|int|1.0.0 -kyuubi.ha.zookeeper.connection.max.retry.wait|30000|Max amount of time to wait between retries for BOUNDED_EXPONENTIAL_BACKOFF policy can reach, or max time until elapsed for UNTIL_ELAPSED policy to connect the zookeeper ensemble|int|1.0.0 -kyuubi.ha.zookeeper.connection.retry.policy|EXPONENTIAL_BACKOFF|The retry policy for connecting to the zookeeper ensemble, all candidates are:
  • ONE_TIME
  • N_TIME
  • EXPONENTIAL_BACKOFF
  • BOUNDED_EXPONENTIAL_BACKOFF
  • UNTIL_ELAPSED
|string|1.0.0 -kyuubi.ha.zookeeper.connection.timeout|15000|The timeout(ms) of creating the connection to the zookeeper ensemble|int|1.0.0 -kyuubi.ha.zookeeper.engine.auth.type|NONE|The type of zookeeper authentication for engine, all candidates are
  • NONE
  • KERBEROS
  • DIGEST
|string|1.3.2 -kyuubi.ha.zookeeper.namespace|kyuubi|(deprecated) The root directory for the service to deploy its instance uri|string|1.0.0 -kyuubi.ha.zookeeper.node.creation.timeout|PT2M|Timeout for creating zookeeper node|duration|1.2.0 -kyuubi.ha.zookeeper.publish.configs|false|When set to true, publish Kerberos configs to Zookeeper.Note that the Hive driver needs to be greater than 1.3 or 2.0 or apply HIVE-11581 patch.|boolean|1.4.0 -kyuubi.ha.zookeeper.quorum||(deprecated) The connection string for the zookeeper ensemble|string|1.0.0 -kyuubi.ha.zookeeper.session.timeout|60000|The timeout(ms) of a connected session to be idled|int|1.0.0 - +| Key | Default | Meaning | Type | Since | +|------------------------------------------------|----------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.ha.addresses || The connection string for the discovery ensemble | string | 1.6.0 | +| kyuubi.ha.client.class | org.apache.kyuubi.ha.client.zookeeper.ZookeeperDiscoveryClient | Class name for service discovery client.
  • Zookeeper: org.apache.kyuubi.ha.client.zookeeper.ZookeeperDiscoveryClient
  • Etcd: org.apache.kyuubi.ha.client.etcd.EtcdDiscoveryClient
| string | 1.6.0 | +| kyuubi.ha.etcd.lease.timeout | PT10S | Timeout for etcd keep alive lease. The kyuubi server will know the unexpected loss of engine after up to this seconds. | duration | 1.6.0 | +| kyuubi.ha.etcd.ssl.ca.path | <undefined> | Where the etcd CA certificate file is stored. | string | 1.6.0 | +| kyuubi.ha.etcd.ssl.client.certificate.path | <undefined> | Where the etcd SSL certificate file is stored. | string | 1.6.0 | +| kyuubi.ha.etcd.ssl.client.key.path | <undefined> | Where the etcd SSL key file is stored. | string | 1.6.0 | +| kyuubi.ha.etcd.ssl.enabled | false | When set to true, will build an SSL secured etcd client. | boolean | 1.6.0 | +| kyuubi.ha.namespace | kyuubi | The root directory for the service to deploy its instance uri | string | 1.6.0 | +| kyuubi.ha.zookeeper.acl.enabled | false | Set to true if the ZooKeeper ensemble is kerberized | boolean | 1.0.0 | +| kyuubi.ha.zookeeper.auth.digest | <undefined> | The digest auth string is used for ZooKeeper authentication, like: username:password. | string | 1.3.2 | +| kyuubi.ha.zookeeper.auth.keytab | <undefined> | Location of the Kyuubi server's keytab is used for ZooKeeper authentication. | string | 1.3.2 | +| kyuubi.ha.zookeeper.auth.principal | <undefined> | Name of the Kerberos principal is used for ZooKeeper authentication. | string | 1.3.2 | +| kyuubi.ha.zookeeper.auth.type | NONE | The type of ZooKeeper authentication, all candidates are
  • NONE
  • KERBEROS
  • DIGEST
| string | 1.3.2 | +| kyuubi.ha.zookeeper.connection.base.retry.wait | 1000 | Initial amount of time to wait between retries to the ZooKeeper ensemble | int | 1.0.0 | +| kyuubi.ha.zookeeper.connection.max.retries | 3 | Max retry times for connecting to the ZooKeeper ensemble | int | 1.0.0 | +| kyuubi.ha.zookeeper.connection.max.retry.wait | 30000 | Max amount of time to wait between retries for BOUNDED_EXPONENTIAL_BACKOFF policy can reach, or max time until elapsed for UNTIL_ELAPSED policy to connect the zookeeper ensemble | int | 1.0.0 | +| kyuubi.ha.zookeeper.connection.retry.policy | EXPONENTIAL_BACKOFF | The retry policy for connecting to the ZooKeeper ensemble, all candidates are:
  • ONE_TIME
  • N_TIME
  • EXPONENTIAL_BACKOFF
  • BOUNDED_EXPONENTIAL_BACKOFF
  • UNTIL_ELAPSED
| string | 1.0.0 | +| kyuubi.ha.zookeeper.connection.timeout | 15000 | The timeout(ms) of creating the connection to the ZooKeeper ensemble | int | 1.0.0 | +| kyuubi.ha.zookeeper.engine.auth.type | NONE | The type of ZooKeeper authentication for the engine, all candidates are
  • NONE
  • KERBEROS
  • DIGEST
| string | 1.3.2 | +| kyuubi.ha.zookeeper.namespace | kyuubi | (deprecated) The root directory for the service to deploy its instance uri | string | 1.0.0 | +| kyuubi.ha.zookeeper.node.creation.timeout | PT2M | Timeout for creating ZooKeeper node | duration | 1.2.0 | +| kyuubi.ha.zookeeper.publish.configs | false | When set to true, publish Kerberos configs to Zookeeper. Note that the Hive driver needs to be greater than 1.3 or 2.0 or apply HIVE-11581 patch. | boolean | 1.4.0 | +| kyuubi.ha.zookeeper.quorum || (deprecated) The connection string for the ZooKeeper ensemble | string | 1.0.0 | +| kyuubi.ha.zookeeper.session.timeout | 60000 | The timeout(ms) of a connected session to be idled | int | 1.0.0 | ### Kinit -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.kinit.interval|PT1H|How often will Kyuubi server run `kinit -kt [keytab] [principal]` to renew the local Kerberos credentials cache|duration|1.0.0 -kyuubi.kinit.keytab|<undefined>|Location of Kyuubi server's keytab.|string|1.0.0 -kyuubi.kinit.max.attempts|10|How many times will `kinit` process retry|int|1.0.0 -kyuubi.kinit.principal|<undefined>|Name of the Kerberos principal.|string|1.0.0 - +| Key | Default | Meaning | Type | Since | +|---------------------------|-------------------|---------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.kinit.interval | PT1H | How often will the Kyuubi server run `kinit -kt [keytab] [principal]` to renew the local Kerberos credentials cache | duration | 1.0.0 | +| kyuubi.kinit.keytab | <undefined> | Location of Kyuubi server's keytab. | string | 1.0.0 | +| kyuubi.kinit.max.attempts | 10 | How many times will `kinit` process retry | int | 1.0.0 | +| kyuubi.kinit.principal | <undefined> | Name of the Kerberos principal. | string | 1.0.0 | ### Kubernetes -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.kubernetes.authenticate.caCertFile|<undefined>|Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme)|string|1.7.0 -kyuubi.kubernetes.authenticate.clientCertFile|<undefined>|Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme)|string|1.7.0 -kyuubi.kubernetes.authenticate.clientKeyFile|<undefined>|Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme)|string|1.7.0 -kyuubi.kubernetes.authenticate.oauthToken|<undefined>|The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike the other authentication options, this must be the exact string value of the token to use for the authentication.|string|1.7.0 -kyuubi.kubernetes.authenticate.oauthTokenFile|<undefined>|Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme)|string|1.7.0 -kyuubi.kubernetes.context|<undefined>|The desired context from your kubernetes config file used to configure the K8S client for interacting with the cluster.|string|1.6.0 -kyuubi.kubernetes.master.address|<undefined>|The internal Kubernetes master (API server) address to be used for kyuubi.|string|1.7.0 -kyuubi.kubernetes.namespace|default|The namespace that will be used for running the kyuubi pods and find engines.|string|1.7.0 -kyuubi.kubernetes.trust.certificates|false|If set to true then client can submit to kubernetes cluster only with token|boolean|1.7.0 - +| Key | Default | Meaning | Type | Since | +|-----------------------------------------------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | +| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | +| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | +| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | +| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | ### Metadata -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.metadata.cleaner.enabled|true|Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in terminate state with max age limitation.|boolean|1.6.0 -kyuubi.metadata.cleaner.interval|PT30M|The interval to check and clean expired metadata.|duration|1.6.0 -kyuubi.metadata.max.age|PT72H|The maximum age of metadata, the metadata that exceeds the age will be cleaned.|duration|1.6.0 -kyuubi.metadata.recovery.threads|10|The number of threads for recovery from metadata store when Kyuubi server restarting.|int|1.6.0 -kyuubi.metadata.request.retry.interval|PT5S|The interval to check and trigger the metadata request retry tasks.|duration|1.6.0 -kyuubi.metadata.request.retry.queue.size|65536|The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds.|int|1.6.0 -kyuubi.metadata.request.retry.threads|10|Number of threads in the metadata request retry manager thread pool. The metadata store might be unavailable sometimes and the requests will fail, to tolerant for this case and unblock the main thread, we support to retry the failed requests in async way.|int|1.6.0 -kyuubi.metadata.store.class|org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore|Fully qualified class name for server metadata store.|string|1.6.0 -kyuubi.metadata.store.jdbc.database.schema.init|true|Whether to init the jdbc metadata store database schema.|boolean|1.6.0 -kyuubi.metadata.store.jdbc.database.type|DERBY|The database type for server jdbc metadata store.
  • DERBY: Apache Derby, jdbc driver `org.apache.derby.jdbc.AutoloadedDriver`.
  • MYSQL: MySQL, jdbc driver `com.mysql.jdbc.Driver`.
  • CUSTOM: User-defined database type, need to specify corresponding jdbc driver.
  • Note that: The jdbc datasource is powered by HiKariCP, for datasource properties, please specify them with prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000.|string|1.6.0 -kyuubi.metadata.store.jdbc.driver|<undefined>|JDBC driver class name for server jdbc metadata store.|string|1.6.0 -kyuubi.metadata.store.jdbc.password||The password for server jdbc metadata store.|string|1.6.0 -kyuubi.metadata.store.jdbc.url|jdbc:derby:memory:kyuubi_state_store_db;create=true|The jdbc url for server jdbc metadata store. By defaults, it is a DERBY in-memory database url, and the state information is not shared across kyuubi instances. To enable multiple kyuubi instances high available, please specify a production jdbc url.|string|1.6.0 -kyuubi.metadata.store.jdbc.user||The username for server jdbc metadata store.|string|1.6.0 - +| Key | Default | Meaning | Type | Since | +|-------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | +| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | +| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | +| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | +| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | +| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.type | DERBY | The database type for server jdbc metadata store.
    • DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
    • MYSQL: MySQL, JDBC driver `com.mysql.jdbc.Driver`.
    • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
    • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.url | jdbc:derby:memory:kyuubi_state_store_db;create=true | The JDBC url for server JDBC metadata store. By default, it is a DERBY in-memory database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | ### Metrics -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.metrics.console.interval|PT5S|How often should report metrics to console|duration|1.2.0 -kyuubi.metrics.enabled|true|Set to true to enable kyuubi metrics system|boolean|1.2.0 -kyuubi.metrics.json.interval|PT5S|How often should report metrics to json file|duration|1.2.0 -kyuubi.metrics.json.location|metrics|Where the json metrics file located|string|1.2.0 -kyuubi.metrics.prometheus.path|/metrics|URI context path of prometheus metrics HTTP server|string|1.2.0 -kyuubi.metrics.prometheus.port|10019|Prometheus metrics HTTP server port|int|1.2.0 -kyuubi.metrics.reporters|JSON|A comma separated list for all metrics reporters
      • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
      • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
      • JSON - JsonReporter which outputs measurements to json file periodically.
      • PROMETHEUS - PrometheusReporter which exposes metrics in prometheus format.
      • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
      |seq|1.2.0 -kyuubi.metrics.slf4j.interval|PT5S|How often should report metrics to SLF4J logger|duration|1.2.0 - +| Key | Default | Meaning | Type | Since | +|---------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metrics.console.interval | PT5S | How often should report metrics to console | duration | 1.2.0 | +| kyuubi.metrics.enabled | true | Set to true to enable kyuubi metrics system | boolean | 1.2.0 | +| kyuubi.metrics.json.interval | PT5S | How often should report metrics to JSON file | duration | 1.2.0 | +| kyuubi.metrics.json.location | metrics | Where the JSON metrics file located | string | 1.2.0 | +| kyuubi.metrics.prometheus.path | /metrics | URI context path of prometheus metrics HTTP server | string | 1.2.0 | +| kyuubi.metrics.prometheus.port | 10019 | Prometheus metrics HTTP server port | int | 1.2.0 | +| kyuubi.metrics.reporters | JSON | A comma-separated list for all metrics reporters
      • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
      • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
      • JSON - JsonReporter which outputs measurements to json file periodically.
      • PROMETHEUS - PrometheusReporter which exposes metrics in Prometheus format.
      • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
      | seq | 1.2.0 | +| kyuubi.metrics.slf4j.interval | PT5S | How often should report metrics to SLF4J logger | duration | 1.2.0 | ### Operation -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.operation.idle.timeout|PT3H|Operation will be closed when it's not accessed for this duration of time|duration|1.0.0 -kyuubi.operation.interrupt.on.cancel|true|When true, all running tasks will be interrupted if one cancels a query. When false, all running tasks will remain until finished.|boolean|1.2.0 -kyuubi.operation.language|SQL|Choose a programing language for the following inputs
      • SQL: (Default) Run all following statements as SQL queries.
      • SCALA: Run all following input a scala codes
      |string|1.5.0 -kyuubi.operation.log.dir.root|server_operation_logs|Root directory for query operation log at server-side.|string|1.4.0 -kyuubi.operation.plan.only.excludes|ResetCommand,SetCommand,SetNamespaceCommand,UseStatement,SetCatalogAndNamespace|Comma-separated list of query plan names, in the form of simple class names, i.e, for `set abc=xyz`, the value will be `SetCommand`. For those auxiliary plans, such as `switch databases`, `set properties`, or `create temporary view` e.t.c, which are used for setup evaluating environments for analyzing actual queries, we can use this config to exclude them and let them take effect. See also kyuubi.operation.plan.only.mode.|seq|1.5.0 -kyuubi.operation.plan.only.mode|none|Configures the statement performed mode, The value can be 'parse', 'analyze', 'optimize', 'optimize_with_stats', 'physical', 'execution', or 'none', when it is 'none', indicate to the statement will be fully executed, otherwise only way without executing the query. different engines currently support different modes, the Spark engine supports all modes, and the Flink engine supports 'parse', 'physical', and 'execution', other engines do not support planOnly currently.|string|1.4.0 -kyuubi.operation.plan.only.output.style|plain|Configures the planOnly output style, The value can be 'plain' and 'json', default value is 'plain', this configuration supports only the output styles of the Spark engine|string|1.7.0 -kyuubi.operation.progress.enabled|false|Whether to enable the operation progress. When true, the operation progress will be returned in `GetOperationStatus`.|boolean|1.6.0 -kyuubi.operation.query.timeout|<undefined>|Timeout for query executions at server-side, take affect with client-side timeout(`java.sql.Statement.setQueryTimeout`) together, a running query will be cancelled automatically if timeout. It's off by default, which means only client-side take fully control whether the query should timeout or not. If set, client-side timeout capped at this point. To cancel the queries right away without waiting task to finish, consider enabling kyuubi.operation.interrupt.on.cancel together.|duration|1.2.0 -kyuubi.operation.result.format|thrift|Specify the result format, available configs are:
      • THRIFT: the result will convert to TRow at the engine driver side.
      • ARROW: the result will be encoded as Arrow at the executor side before collecting by the driver, and deserialized at the client side. note that it only takes effect for kyuubi-hive-jdbc clients now.
      |string|1.7.0 -kyuubi.operation.result.max.rows|0|Max rows of Spark query results. Rows that exceeds the limit would be ignored. By setting this value to 0 to disable the max rows limit.|int|1.6.0 -kyuubi.operation.scheduler.pool|<undefined>|The scheduler pool of job. Note that, this config should be used after change Spark config spark.scheduler.mode=FAIR.|string|1.1.1 -kyuubi.operation.spark.listener.enabled|true|When set to true, Spark engine registers a SQLOperationListener before executing the statement, logs a few summary statistics when each stage completes.|boolean|1.6.0 -kyuubi.operation.status.polling.timeout|PT5S|Timeout(ms) for long polling asynchronous running sql query's status|duration|1.0.0 - +| Key | Default | Meaning | Type | Since | +|--------------------------------------------------|---------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by returning table identities only. | boolean | 1.8.0 | +| kyuubi.operation.idle.timeout | PT3H | Operation will be closed when it's not accessed for this duration of time | duration | 1.0.0 | +| kyuubi.operation.interrupt.on.cancel | true | When true, all running tasks will be interrupted if one cancels a query. When false, all running tasks will remain until finished. | boolean | 1.2.0 | +| kyuubi.operation.language | SQL | Choose a programing language for the following inputs
      • SQL: (Default) Run all following statements as SQL queries.
      • SCALA: Run all following input as scala codes
      • PYTHON: (Experimental) Run all following input as Python codes with Spark engine
      | string | 1.5.0 | +| kyuubi.operation.log.dir.root | server_operation_logs | Root directory for query operation log at server-side. | string | 1.4.0 | +| kyuubi.operation.plan.only.excludes | ResetCommand,SetCommand,SetNamespaceCommand,UseStatement,SetCatalogAndNamespace | Comma-separated list of query plan names, in the form of simple class names, i.e, for `SET abc=xyz`, the value will be `SetCommand`. For those auxiliary plans, such as `switch databases`, `set properties`, or `create temporary view` etc., which are used for setup evaluating environments for analyzing actual queries, we can use this config to exclude them and let them take effect. See also kyuubi.operation.plan.only.mode. | seq | 1.5.0 | +| kyuubi.operation.plan.only.mode | none | Configures the statement performed mode, The value can be 'parse', 'analyze', 'optimize', 'optimize_with_stats', 'physical', 'execution', or 'none', when it is 'none', indicate to the statement will be fully executed, otherwise only way without executing the query. different engines currently support different modes, the Spark engine supports all modes, and the Flink engine supports 'parse', 'physical', and 'execution', other engines do not support planOnly currently. | string | 1.4.0 | +| kyuubi.operation.plan.only.output.style | plain | Configures the planOnly output style. The value can be 'plain' or 'json', and the default value is 'plain'. This configuration supports only the output styles of the Spark engine | string | 1.7.0 | +| kyuubi.operation.progress.enabled | false | Whether to enable the operation progress. When true, the operation progress will be returned in `GetOperationStatus`. | boolean | 1.6.0 | +| kyuubi.operation.query.timeout | <undefined> | Timeout for query executions at server-side, take effect with client-side timeout(`java.sql.Statement.setQueryTimeout`) together, a running query will be cancelled automatically if timeout. It's off by default, which means only client-side take full control of whether the query should timeout or not. If set, client-side timeout is capped at this point. To cancel the queries right away without waiting for task to finish, consider enabling kyuubi.operation.interrupt.on.cancel together. | duration | 1.2.0 | +| kyuubi.operation.result.arrow.timestampAsString | false | When true, arrow-based rowsets will convert columns of type timestamp to strings for transmission. | boolean | 1.7.0 | +| kyuubi.operation.result.format | thrift | Specify the result format, available configs are:
      • THRIFT: the result will convert to TRow at the engine driver side.
      • ARROW: the result will be encoded as Arrow at the executor side before collecting by the driver, and deserialized at the client side. note that it only takes effect for kyuubi-hive-jdbc clients now.
      | string | 1.7.0 | +| kyuubi.operation.result.max.rows | 0 | Max rows of Spark query results. Rows exceeding the limit would be ignored. By setting this value to 0 to disable the max rows limit. | int | 1.6.0 | +| kyuubi.operation.scheduler.pool | <undefined> | The scheduler pool of job. Note that, this config should be used after changing Spark config spark.scheduler.mode=FAIR. | string | 1.1.1 | +| kyuubi.operation.spark.listener.enabled | true | When set to true, Spark engine registers an SQLOperationListener before executing the statement, logging a few summary statistics when each stage completes. | boolean | 1.6.0 | +| kyuubi.operation.status.polling.timeout | PT5S | Timeout(ms) for long polling asynchronous running sql query's status | duration | 1.0.0 | ### Server -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.server.batch.limit.connections.per.ipaddress|<undefined>|Maximum kyuubi server batch connections per ipaddress. Any user exceeding this limit will not be allowed to connect.|int|1.7.0 -kyuubi.server.batch.limit.connections.per.user|<undefined>|Maximum kyuubi server batch connections per user. Any user exceeding this limit will not be allowed to connect.|int|1.7.0 -kyuubi.server.batch.limit.connections.per.user.ipaddress|<undefined>|Maximum kyuubi server batch connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect.|int|1.7.0 -kyuubi.server.info.provider|ENGINE|The server information provider name, some clients may rely on this information to check the server compatibilities and functionalities.
    • SERVER: Return Kyuubi server information.
    • ENGINE: Return Kyuubi engine information.
    • |string|1.6.1 -kyuubi.server.limit.connections.per.ipaddress|<undefined>|Maximum kyuubi server connections per ipaddress. Any user exceeding this limit will not be allowed to connect.|int|1.6.0 -kyuubi.server.limit.connections.per.user|<undefined>|Maximum kyuubi server connections per user. Any user exceeding this limit will not be allowed to connect.|int|1.6.0 -kyuubi.server.limit.connections.per.user.ipaddress|<undefined>|Maximum kyuubi server connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect.|int|1.6.0 -kyuubi.server.limit.connections.user.unlimited.list||The maximin connections of the user in the white list will not be limited.|seq|1.7.0 -kyuubi.server.name|<undefined>|The name of Kyuubi Server.|string|1.5.0 -kyuubi.server.redaction.regex|<undefined>|Regex to decide which Kyuubi contain sensitive information. When this regex matches a property key or value, the value is redacted from the various logs.||1.6.0 - +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.server.administrators || Comma-separated list of Kyuubi service administrators. We use this config to grant admin permission to any service accounts. | seq | 1.8.0 | +| kyuubi.server.info.provider | ENGINE | The server information provider name, some clients may rely on this information to check the server compatibilities and functionalities.
    • SERVER: Return Kyuubi server information.
    • ENGINE: Return Kyuubi engine information.
    • | string | 1.6.1 | +| kyuubi.server.limit.batch.connections.per.ipaddress | <undefined> | Maximum kyuubi server batch connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | +| kyuubi.server.limit.batch.connections.per.user | <undefined> | Maximum kyuubi server batch connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | +| kyuubi.server.limit.batch.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server batch connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.7.0 | +| kyuubi.server.limit.client.fetch.max.rows | <undefined> | Max rows limit for getting result row set operation. If the max rows specified by client-side is larger than the limit, request will fail directly. | int | 1.8.0 | +| kyuubi.server.limit.connections.per.ipaddress | <undefined> | Maximum kyuubi server connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | +| kyuubi.server.limit.connections.per.user | <undefined> | Maximum kyuubi server connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | +| kyuubi.server.limit.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.6.0 | +| kyuubi.server.limit.connections.user.unlimited.list || The maximum connections of the user in the white list will not be limited. | seq | 1.7.0 | +| kyuubi.server.name | <undefined> | The name of Kyuubi Server. | string | 1.5.0 | +| kyuubi.server.periodicGC.interval | PT30M | How often to trigger a garbage collection. | duration | 1.7.0 | +| kyuubi.server.redaction.regex | <undefined> | Regex to decide which Kyuubi contain sensitive information. When this regex matches a property key or value, the value is redacted from the various logs. || 1.6.0 | ### Session -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.session.check.interval|PT5M|The check interval for session timeout.|duration|1.0.0 -kyuubi.session.conf.advisor|<undefined>|A config advisor plugin for Kyuubi Server. This plugin can provide some custom configs for different user or session configs and overwrite the session configs before open a new session. This config value should be a class which is a child of 'org.apache.kyuubi.plugin.SessionConfAdvisor' which has zero-arg constructor.|string|1.5.0 -kyuubi.session.conf.file.reload.interval|PT10M|When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded.|duration|1.7.0 -kyuubi.session.conf.ignore.list||A comma separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax.|seq|1.2.0 -kyuubi.session.conf.profile|<undefined>|Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only has effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`.|string|1.7.0 -kyuubi.session.conf.restrict.list||A comma separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax.|seq|1.2.0 -kyuubi.session.engine.alive.probe.enabled|false|Whether to enable the engine alive probe, it true, we will create a companion thrift client that sends simple request to check whether the engine is keep alive.|boolean|1.6.0 -kyuubi.session.engine.alive.probe.interval|PT10S|The interval for engine alive probe.|duration|1.6.0 -kyuubi.session.engine.alive.timeout|PT2M|The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive.|duration|1.6.0 -kyuubi.session.engine.check.interval|PT1M|The check interval for engine timeout|duration|1.0.0 -kyuubi.session.engine.flink.main.resource|<undefined>|The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default|string|1.4.0 -kyuubi.session.engine.flink.max.rows|1000000|Max rows of Flink query results. For batch queries, rows that exceeds the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached.|int|1.5.0 -kyuubi.session.engine.hive.main.resource|<undefined>|The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default|string|1.6.0 -kyuubi.session.engine.idle.timeout|PT30M|engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate.|duration|1.0.0 -kyuubi.session.engine.initialize.timeout|PT3M|Timeout for starting the background engine, e.g. SparkSQLEngine.|duration|1.0.0 -kyuubi.session.engine.launch.async|true|When opening kyuubi session, whether to launch backend engine asynchronously. When true, the Kyuubi server will set up the connection with the client without delay as the backend engine will be created asynchronously.|boolean|1.4.0 -kyuubi.session.engine.log.timeout|PT24H|If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value.|duration|1.1.0 -kyuubi.session.engine.login.timeout|PT15S|The timeout of creating the connection to remote sql query engine|duration|1.0.0 -kyuubi.session.engine.open.max.attempts|9|The number of times an open engine will retry when encountering a special error.|int|1.7.0 -kyuubi.session.engine.open.retry.wait|PT10S|How long to wait before retrying to open engine after a failure.|duration|1.7.0 -kyuubi.session.engine.share.level|USER|(deprecated) - Using kyuubi.engine.share.level instead|string|1.0.0 -kyuubi.session.engine.spark.main.resource|<undefined>|The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default|string|1.0.0 -kyuubi.session.engine.spark.max.lifetime|PT0S|Max lifetime for spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate.|duration|1.6.0 -kyuubi.session.engine.spark.progress.timeFormat|yyyy-MM-dd HH:mm:ss.SSS|The time format of the progress bar|string|1.6.0 -kyuubi.session.engine.spark.progress.update.interval|PT1S|Update period of progress bar.|duration|1.6.0 -kyuubi.session.engine.spark.showProgress|false|When true, show the progress bar in the spark engine log.|boolean|1.6.0 -kyuubi.session.engine.startup.error.max.size|8192|During engine bootstrapping, if error occurs, using this config to limit the length error message(characters).|int|1.1.0 -kyuubi.session.engine.startup.maxLogLines|10|The maximum number of engine log lines when errors occur during engine startup phase. Note that this max lines is for client-side to help track engine startup issue.|int|1.4.0 -kyuubi.session.engine.startup.waitCompletion|true|Whether to wait for completion after engine starts. If false, the startup process will be destroyed after the engine is started. Note that only use it when the driver is not running locally, such as yarn-cluster mode; Otherwise, the engine will be killed.|boolean|1.5.0 -kyuubi.session.engine.trino.connection.catalog|<undefined>|The default catalog that trino engine will connect to|string|1.5.0 -kyuubi.session.engine.trino.connection.url|<undefined>|The server url that trino engine will connect to|string|1.5.0 -kyuubi.session.engine.trino.main.resource|<undefined>|The package used to create Trino engine remote job. If it is undefined, Kyuubi will use the default|string|1.5.0 -kyuubi.session.engine.trino.showProgress|true|When true, show the progress bar and final info in the trino engine log.|boolean|1.6.0 -kyuubi.session.engine.trino.showProgress.debug|false|When true, show the progress debug info in the trino engine log.|boolean|1.6.0 -kyuubi.session.group.provider|hadoop|A group provider plugin for Kyuubi Server. This plugin can provide primary group and groups information for different user or session configs. This config value should be a class which is a child of 'org.apache.kyuubi.plugin.GroupProvider' which has zero-arg constructor. Kyuubi provides the following built-in implementations:
    • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
    • |string|1.7.0 -kyuubi.session.idle.timeout|PT6H|session idle timeout, it will be closed when it's not accessed for this duration|duration|1.2.0 -kyuubi.session.local.dir.allow.list||The local dir list that are allowed to access by the kyuubi session application. User might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that and please use absolute path list.|seq|1.6.0 -kyuubi.session.name|<undefined>|A human readable name of session and we use empty string by default. This name will be recorded in event. Note that, we only apply this value from session conf.|string|1.4.0 -kyuubi.session.timeout|PT6H|(deprecated)session timeout, it will be closed when it's not accessed for this duration|duration|1.0.0 -kyuubi.session.user.sign.enabled|false|Whether to verify the integrity of session user name on engine side, e.g. Authz plugin in Spark.|boolean|1.7.0 - +| Key | Default | Meaning | Type | Since | +|------------------------------------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.session.check.interval | PT5M | The check interval for session timeout. | duration | 1.0.0 | +| kyuubi.session.close.on.disconnect | true | Session will be closed when client disconnects from kyuubi gateway. Set this to false to have session outlive its parent connection. | boolean | 1.8.0 | +| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide some custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | string | 1.5.0 | +| kyuubi.session.conf.file.reload.interval | PT10M | When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded. | duration | 1.7.0 | +| kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | seq | 1.2.0 | +| kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | +| kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | seq | 1.2.0 | +| kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | +| kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | +| kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | +| kyuubi.session.engine.check.interval | PT1M | The check interval for engine timeout | duration | 1.0.0 | +| kyuubi.session.engine.flink.main.resource | <undefined> | The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default | string | 1.4.0 | +| kyuubi.session.engine.flink.max.rows | 1000000 | Max rows of Flink query results. For batch queries, rows exceeding the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached. | int | 1.5.0 | +| kyuubi.session.engine.hive.main.resource | <undefined> | The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default | string | 1.6.0 | +| kyuubi.session.engine.idle.timeout | PT30M | engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate. | duration | 1.0.0 | +| kyuubi.session.engine.initialize.timeout | PT3M | Timeout for starting the background engine, e.g. SparkSQLEngine. | duration | 1.0.0 | +| kyuubi.session.engine.launch.async | true | When opening kyuubi session, whether to launch the backend engine asynchronously. When true, the Kyuubi server will set up the connection with the client without delay as the backend engine will be created asynchronously. | boolean | 1.4.0 | +| kyuubi.session.engine.log.timeout | PT24H | If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value. | duration | 1.1.0 | +| kyuubi.session.engine.login.timeout | PT15S | The timeout of creating the connection to remote sql query engine | duration | 1.0.0 | +| kyuubi.session.engine.open.max.attempts | 9 | The number of times an open engine will retry when encountering a special error. | int | 1.7.0 | +| kyuubi.session.engine.open.retry.wait | PT10S | How long to wait before retrying to open the engine after failure. | duration | 1.7.0 | +| kyuubi.session.engine.share.level | USER | (deprecated) - Using kyuubi.engine.share.level instead | string | 1.0.0 | +| kyuubi.session.engine.spark.main.resource | <undefined> | The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default | string | 1.0.0 | +| kyuubi.session.engine.spark.max.lifetime | PT0S | Max lifetime for Spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate. | duration | 1.6.0 | +| kyuubi.session.engine.spark.progress.timeFormat | yyyy-MM-dd HH:mm:ss.SSS | The time format of the progress bar | string | 1.6.0 | +| kyuubi.session.engine.spark.progress.update.interval | PT1S | Update period of progress bar. | duration | 1.6.0 | +| kyuubi.session.engine.spark.showProgress | false | When true, show the progress bar in the Spark's engine log. | boolean | 1.6.0 | +| kyuubi.session.engine.startup.error.max.size | 8192 | During engine bootstrapping, if anderror occurs, using this config to limit the length of error message(characters). | int | 1.1.0 | +| kyuubi.session.engine.startup.maxLogLines | 10 | The maximum number of engine log lines when errors occur during the engine startup phase. Note that this config effects on client-side to help track engine startup issues. | int | 1.4.0 | +| kyuubi.session.engine.startup.waitCompletion | true | Whether to wait for completion after the engine starts. If false, the startup process will be destroyed after the engine is started. Note that only use it when the driver is not running locally, such as in yarn-cluster mode; Otherwise, the engine will be killed. | boolean | 1.5.0 | +| kyuubi.session.engine.trino.connection.catalog | <undefined> | The default catalog that Trino engine will connect to | string | 1.5.0 | +| kyuubi.session.engine.trino.connection.url | <undefined> | The server url that Trino engine will connect to | string | 1.5.0 | +| kyuubi.session.engine.trino.main.resource | <undefined> | The package used to create Trino engine remote job. If it is undefined, Kyuubi will use the default | string | 1.5.0 | +| kyuubi.session.engine.trino.showProgress | true | When true, show the progress bar and final info in the Trino engine log. | boolean | 1.6.0 | +| kyuubi.session.engine.trino.showProgress.debug | false | When true, show the progress debug info in the Trino engine log. | boolean | 1.6.0 | +| kyuubi.session.group.provider | hadoop | A group provider plugin for Kyuubi Server. This plugin can provide primary group and groups information for different users or session configs. This config value should be a subclass of `org.apache.kyuubi.plugin.GroupProvider` which has a zero-arg constructor. Kyuubi provides the following built-in implementations:
    • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
    • | string | 1.7.0 | +| kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | +| kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | seq | 1.6.0 | +| kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | +| kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | +| kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | ### Spnego -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.spnego.keytab|<undefined>|Keytab file for SPNego principal|string|1.6.0 -kyuubi.spnego.principal|<undefined>|SPNego service principal, typical value would look like HTTP/_HOST@EXAMPLE.COM. SPNego service principal would be used when restful Kerberos security is enabled. This needs to be set only if SPNEGO is to be used in authentication.|string|1.6.0 - +| Key | Default | Meaning | Type | Since | +|-------------------------|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-------| +| kyuubi.spnego.keytab | <undefined> | Keytab file for SPNego principal | string | 1.6.0 | +| kyuubi.spnego.principal | <undefined> | SPNego service principal, typical value would look like HTTP/_HOST@EXAMPLE.COM. SPNego service principal would be used when restful Kerberos security is enabled. This needs to be set only if SPNEGO is to be used in authentication. | string | 1.6.0 | ### Zookeeper -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.zookeeper.embedded.client.port|2181|clientPort for the embedded zookeeper server to listen for client connections, a client here could be Kyuubi server, engine and JDBC client|int|1.2.0 -kyuubi.zookeeper.embedded.client.port.address|<undefined>|clientPortAddress for the embedded zookeeper server to|string|1.2.0 -kyuubi.zookeeper.embedded.data.dir|embedded_zookeeper|dataDir for the embedded zookeeper server where stores the in-memory database snapshots and, unless specified otherwise, the transaction log of updates to the database.|string|1.2.0 -kyuubi.zookeeper.embedded.data.log.dir|embedded_zookeeper|dataLogDir for the embedded zookeeper server where writes the transaction log .|string|1.2.0 -kyuubi.zookeeper.embedded.directory|embedded_zookeeper|The temporary directory for the embedded zookeeper server|string|1.0.0 -kyuubi.zookeeper.embedded.max.client.connections|120|maxClientCnxns for the embedded zookeeper server to limits the number of concurrent connections of a single client identified by IP address|int|1.2.0 -kyuubi.zookeeper.embedded.max.session.timeout|60000|maxSessionTimeout in milliseconds for the embedded zookeeper server will allow the client to negotiate. Defaults to 20 times the tickTime|int|1.2.0 -kyuubi.zookeeper.embedded.min.session.timeout|6000|minSessionTimeout in milliseconds for the embedded zookeeper server will allow the client to negotiate. Defaults to 2 times the tickTime|int|1.2.0 -kyuubi.zookeeper.embedded.port|2181|The port of the embedded zookeeper server|int|1.0.0 -kyuubi.zookeeper.embedded.tick.time|3000|tickTime in milliseconds for the embedded zookeeper server|int|1.2.0 +| Key | Default | Meaning | Type | Since | +|--------------------------------------------------|--------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-------| +| kyuubi.zookeeper.embedded.client.port | 2181 | clientPort for the embedded ZooKeeper server to listen for client connections, a client here could be Kyuubi server, engine, and JDBC client | int | 1.2.0 | +| kyuubi.zookeeper.embedded.client.port.address | <undefined> | clientPortAddress for the embedded ZooKeeper server to | string | 1.2.0 | +| kyuubi.zookeeper.embedded.data.dir | embedded_zookeeper | dataDir for the embedded zookeeper server where stores the in-memory database snapshots and, unless specified otherwise, the transaction log of updates to the database. | string | 1.2.0 | +| kyuubi.zookeeper.embedded.data.log.dir | embedded_zookeeper | dataLogDir for the embedded ZooKeeper server where writes the transaction log . | string | 1.2.0 | +| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | The temporary directory for the embedded ZooKeeper server | string | 1.0.0 | +| kyuubi.zookeeper.embedded.max.client.connections | 120 | maxClientCnxns for the embedded ZooKeeper server to limit the number of concurrent connections of a single client identified by IP address | int | 1.2.0 | +| kyuubi.zookeeper.embedded.max.session.timeout | 60000 | maxSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 20 times the tickTime | int | 1.2.0 | +| kyuubi.zookeeper.embedded.min.session.timeout | 6000 | minSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 2 times the tickTime | int | 1.2.0 | +| kyuubi.zookeeper.embedded.port | 2181 | The port of the embedded ZooKeeper server | int | 1.0.0 | +| kyuubi.zookeeper.embedded.tick.time | 3000 | tickTime in milliseconds for the embedded ZooKeeper server | int | 1.2.0 | ## Spark Configurations ### Via spark-defaults.conf -Setting them in `$SPARK_HOME/conf/spark-defaults.conf` supplies with default values for SQL engine application. Available properties can be found at Spark official online documentation for [Spark Configurations](http://spark.apache.org/docs/latest/configuration.html) +Setting them in `$SPARK_HOME/conf/spark-defaults.conf` supplies with default values for SQL engine application. Available properties can be found at Spark official online documentation for [Spark Configurations](https://spark.apache.org/docs/latest/configuration.html) ### Via kyuubi-defaults.conf @@ -553,16 +460,13 @@ Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` supplies with default v Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g``` - **Runtime SQL Configuration** - - - For [Runtime SQL Configurations](http://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they will take affect every time - + - For [Runtime SQL Configurations](https://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they will take affect every time - **Static SQL and Spark Core Configuration** - - - For [Static SQL Configurations](http://spark.apache.org/docs/latest/configuration.html#static-sql-configuration) and other spark core configs, e.g. `spark.executor.memory`, they will take affect if there is no existing SQL engine application. Otherwise, they will just be ignored + - For [Static SQL Configurations](https://spark.apache.org/docs/latest/configuration.html#static-sql-configuration) and other spark core configs, e.g. `spark.executor.memory`, they will take effect if there is no existing SQL engine application. Otherwise, they will just be ignored ### Via SET Syntax -Please refer to the Spark official online documentation for [SET Command](http://spark.apache.org/docs/latest/sql-ref-syntax-aux-conf-mgmt-set.html) +Please refer to the Spark official online documentation for [SET Command](https://spark.apache.org/docs/latest/sql-ref-syntax-aux-conf-mgmt-set.html) ## Flink Configurations @@ -575,6 +479,7 @@ Setting them in `$FLINK_HOME/conf/flink-conf.yaml` supplies with default values Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` supplies with default values for SQL engine application too. You can use properties with the additional prefix `flink.` to override settings in `$FLINK_HOME/conf/flink-conf.yaml`. For example: + ``` flink.parallelism.default 2 flink.taskmanager.memory.process.size 5g @@ -592,86 +497,23 @@ Please refer to the Flink official online documentation for [SET Statements](htt ## Logging -Kyuubi uses [log4j](https://logging.apache.org/log4j/2.x/) for logging. You can configure it using `$KYUUBI_HOME/conf/log4j2.xml`. -```bash - - - - - - - - rest-audit.log - rest-audit-%d{yyyy-MM-dd}-%i.log - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -``` +Kyuubi uses [log4j](https://logging.apache.org/log4j/2.x/) for logging. You can configure it using `$KYUUBI_HOME/conf/log4j2.xml`, see `$KYUUBI_HOME/conf/log4j2.xml.template` as an example. ## Other Configurations ### Hadoop Configurations -Specifying `HADOOP_CONF_DIR` to the directory contains hadoop configuration files or treating them as Spark properties with a `spark.hadoop.` prefix. Please refer to the Spark official online documentation for [Inheriting Hadoop Cluster Configuration](http://spark.apache.org/docs/latest/configuration.html#inheriting-hadoop-cluster-configuration). Also, please refer to the [Apache Hadoop](http://hadoop.apache.org)'s online documentation for an overview on how to configure Hadoop. +Specifying `HADOOP_CONF_DIR` to the directory containing Hadoop configuration files or treating them as Spark properties with a `spark.hadoop.` prefix. Please refer to the Spark official online documentation for [Inheriting Hadoop Cluster Configuration](https://spark.apache.org/docs/latest/configuration.html#inheriting-hadoop-cluster-configuration). Also, please refer to the [Apache Hadoop](https://hadoop.apache.org)'s online documentation for an overview on how to configure Hadoop. ### Hive Configurations -These configurations are used for SQL engine application to talk to Hive MetaStore and could be configured in a `hive-site.xml`. Placed it in `$SPARK_HOME/conf` directory, or treating them as Spark properties with a `spark.hadoop.` prefix. +These configurations are used for SQL engine application to talk to Hive MetaStore and could be configured in a `hive-site.xml`. Placed it in `$SPARK_HOME/conf` directory, or treat them as Spark properties with a `spark.hadoop.` prefix. ## User Defaults In Kyuubi, we can configure user default settings to meet separate needs. These user defaults override system defaults, but will be overridden by those from [JDBC Connection URL](#via-jdbc-connection-url) or [Set Command](#via-set-syntax) if could be. They will take effect when creating the SQL engine application ONLY. User default settings are in the form of `___{username}___.{config key}`. There are three continuous underscores(`_`) at both sides of the `username` and a dot(`.`) that separates the config key and the prefix. For example: + ```bash # For system defaults spark.master=local diff --git a/docs/deployment/spark/aqe.md b/docs/deployment/spark/aqe.md index f85fcbf2037..90cc5aff84c 100644 --- a/docs/deployment/spark/aqe.md +++ b/docs/deployment/spark/aqe.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # How To Use Spark Adaptive Query Execution (AQE) in Kyuubi @@ -197,7 +196,6 @@ By default, if there are only less than 20% partitions of the dataset contain da This optimization rule detects and converts a Join to an empty LocalRelation. - #### Disabling the Hidden Features We can exclude some of the AQE additional rules if performance regression or bug occurs. For example, @@ -210,7 +208,6 @@ SET spark.sql.adaptive.optimizer.excludedRules=org.apache.spark.sql.execution.ad Kyuubi is a long-running service to make it easier for end-users to use Spark SQL without having much of Spark's basic knowledge. It is essential to have a basic configuration that works for most scenarios on the server-side. - ### Setting Default Configurations [Configuring by `spark-defaults.conf`](settings.html#via-spark-defaults-conf) at the engine side is the best way to set up Kyuubi with AQE. All engines will be instantiated with AQE enabled. @@ -234,7 +231,9 @@ spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin=0.2 spark.sql.adaptive.optimizer.excludedRules spark.sql.autoBroadcastJoinThreshold=-1 ``` + #### Tips + Turn on AQE by default can significantly improve the user experience. Other sub-features are all enabled. `advisoryPartitionSizeInBytes` is targeting the HDFS block size @@ -246,7 +245,6 @@ Since AQE requires at least one shuffle, ideally, we need to set `autoBroadcastJ All AQE related configurations are runtime changeable, which means that it can still modify some specific configs by `SET` syntaxes for each SQL query with more precise control on the client-side. - ## Spark Known issues [SPARK-33933: Broadcast timeout happened unexpectedly in AQE](https://issues.apache.org/jira/browse/SPARK-33933) @@ -262,3 +260,4 @@ For other potential problems that may be found in the AQE features of Spark, you 3. [SPARK-31412: New Adaptive Query Execution in Spark SQL](https://issues.apache.org/jira/browse/SPARK-31412) 4. [SPARK-28560: Optimize shuffle reader to local shuffle reader when smj converted to bhj in adaptive execution](https://issues.apache.org/jira/browse/SPARK-28560) 5. [Coalesce and Repartition Hint for SQL Queries](https://issues.apache.org/jira/browse/SPARK-24940) + diff --git a/docs/deployment/spark/dynamic_allocation.md b/docs/deployment/spark/dynamic_allocation.md index 7b35e4bd998..b177b63c365 100644 --- a/docs/deployment/spark/dynamic_allocation.md +++ b/docs/deployment/spark/dynamic_allocation.md @@ -1,24 +1,22 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # How To Use Spark Dynamic Resource Allocation (DRA) in Kyuubi - When we adopt Kyuubi in a production environment, we always want to use the environment's computing resources more cost-effectively and efficiently. Cluster managers such as K8S and Yarn manage the cluster compute resources, @@ -42,7 +40,6 @@ On the one hand, we need to rely on the resource manager's capabilities for effi resource isolation, and sharing. On the other hand, we need to enable Spark's DRA feature for the engines' executors' elastic scaling. - ## The Basics of Dynamic Resource Allocation Spark provides a mechanism to dynamically adjust the application resources based on the workload, which means that an application may give resources back to the cluster if they are no longer used and request them again later when there is demand. @@ -58,7 +55,6 @@ When the engine has executors that become idle, the executors are released, and the occupied resources are given back to the cluster manager. Then other engines or other applications run in the same queue could acquire the resources. - ## How to Enable Dynamic Resource Allocation The prerequisite for enabling this feature is for downstream stages to have proper access to shuffle data, even if the executors that generated the data are recycled. @@ -78,7 +74,6 @@ spark.shuffle.service.enabled=true Another thing to be sure of is that ```spark.shuffle.service.port``` should be configured to point to the port on which the ESS is running. - ### Dynamic Allocation w/o External Shuffle Service Implementations of the ESS feature are cluster manager dependent. Yarn, for instance, where the ESS needs to be deployed cluster-widely and is actually running in the Yarn's `NodeManager` component. Nevertheless, if run Kyuubi's engines on Kubernetes, the ESS is not an option yet. @@ -104,7 +99,6 @@ On the other hand, the ```maxExecutors``` determines the upper bound executors o The following Spark configurations consist of sizing for the DRA. - ``` spark.dynamicAllocation.minExecutors=10 spark.dynamicAllocation.maxExecutors=500 @@ -132,12 +126,10 @@ By default, the dynamic allocation will request enough executors to maximize the - While this minimizes the latency of the job, but with small tasks, the default behavior can waste many resources due to executor allocation overhead, as some executors might not even do any work. In this case, we can adjust ```spark.dynamicAllocation.executorAllocationRatio``` a bit lower to reduce the number of executors w.r.t. full parallelism. For instance, 0.5 will divide the target number of executors by 2. -
      ![](../../imgs/spark/dra_executor_add_ratio.png) @@ -153,6 +145,7 @@ After finish one task, Spark Driver will schedule a new task for the executor w
      If one executor reached the maximum timeout, it will be removed. + ```properties spark.dynamicAllocation.executorIdleTimeout=60s spark.dynamicAllocation.cachedExecutorIdleTimeout=infinity @@ -164,7 +157,6 @@ spark.dynamicAllocation.cachedExecutorIdleTimeout=infinity - If the DRA finds there have been pending tasks backlogged for more than the timeouts, new executors will be requested, controlled by the following configs. ```properties @@ -176,7 +168,6 @@ spark.dynamicAllocation.sustainedSchedulerBacklogTimeout=1s Kyuubi is a long-running service to make it easier for end-users to use Spark SQL without having much of Spark's basic knowledge. It is essential to have a basic configuration for resource management that works for most scenarios on the server-side. - ### Setting Default Configurations [Configuring by `spark-defaults.conf`](settings.html#via-spark-defaults-conf) at the engine side is the best way to set up Kyuubi with DRA. All engines will be instantiated with DRA enabled. @@ -185,7 +176,7 @@ Here is a config setting that we use in our platform when deploying Kyuubi. ```properties spark.dynamicAllocation.enabled=true -##false if perfer shuffle tracking than ESS +##false if prefer shuffle tracking than ESS spark.shuffle.service.enabled=true spark.dynamicAllocation.initialExecutors=10 spark.dynamicAllocation.minExecutors=10 @@ -193,7 +184,7 @@ spark.dynamicAllocation.maxExecutors=500 spark.dynamicAllocation.executorAllocationRatio=0.5 spark.dynamicAllocation.executorIdleTimeout=60s spark.dynamicAllocation.cachedExecutorIdleTimeout=30min -# true if perfer shuffle tracking than ESS +# true if prefer shuffle tracking than ESS spark.dynamicAllocation.shuffleTracking.enabled=false spark.dynamicAllocation.shuffleTracking.timeout=30min spark.dynamicAllocation.schedulerBacklogTimeout=1s @@ -204,6 +195,7 @@ spark.cleaner.periodicGC.interval=5min Note that, ```spark.cleaner.periodicGC.interval=5min``` is useful here when ```spark.dynamicAllocation.shuffleTracking.enabled``` is enabled, as we can tell Spark to be more active for shuffle data GC. ### Setting User Default Settings + On the server-side, the workloads for different users might be different. Then we can set different defaults for them via the [User Defaults](../settings.html#user-defaults) in ```$KYUUBI_HOME/conf/kyuubi-defaults.conf``` @@ -214,11 +206,12 @@ ___kent___.spark.dynamicAllocation.maxExecutors=20 # For a user named bob ___bob___.spark.dynamicAllocation.maxExecutors=600 ``` + In this case, the user named `kent` can only use 20 executors for his engines, but `bob` can use 600 executors for better performance or handle heavy workloads. ### Dynamically Setting -All AQE related configurations are static of Spark core and unchangeable by `SET` syntaxes before each SQL query. For example, +All AQE related configurations are static of Spark core and unchangeable by `SET` commands before each SQL query. For example, ```sql SET spark.dynamicAllocation.maxExecutors=33; @@ -229,9 +222,9 @@ For the above case, the value - 33 will not affect as Spark does not support cha Instead, end-users can set them via [JDBC Connection URL](../settings.html#via-jdbc-connection-url) for some specific cases. - ## References 1. [Spark Official Online Document: Dynamic Resource Allocation](https://spark.apache.org/docs/latest/job-scheduling.html#dynamic-resource-allocation) 2. [Spark Official Online Document: Dynamic Resource Allocation Configurations](https://spark.apache.org/docs/latest/configuration.html#dynamic-allocation) 3. [SPARK-27963: Allow dynamic allocation without an external shuffle service](https://issues.apache.org/jira/browse/SPARK-27963) + diff --git a/docs/deployment/spark/incremental_collection.md b/docs/deployment/spark/incremental_collection.md index 6883cdd0027..28fd4aa7807 100644 --- a/docs/deployment/spark/incremental_collection.md +++ b/docs/deployment/spark/incremental_collection.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Solution for Big Result Sets diff --git a/docs/develop_tools/build_document.md b/docs/develop_tools/build_document.md index c3c310db309..0be5a180705 100644 --- a/docs/develop_tools/build_document.md +++ b/docs/develop_tools/build_document.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Building Kyuubi Documentation @@ -59,10 +58,13 @@ pip install -r requirements.txt Make sure you are in the `$KYUUBI_SOURCE_PATH/docs` directory. linux & macos + ```bash make html ``` + windows + ```bash make.bat html ``` diff --git a/docs/develop_tools/building.md b/docs/develop_tools/building.md index 99fdd47148e..d4582dc8dae 100644 --- a/docs/develop_tools/building.md +++ b/docs/develop_tools/building.md @@ -1,26 +1,25 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Building Kyuubi ## Building Kyuubi with Apache Maven -**Kyuubi** is built based on [Apache Maven](http://maven.apache.org), +**Kyuubi** is built based on [Apache Maven](https://maven.apache.org), ```bash ./build/mvn clean package -DskipTests @@ -62,12 +61,11 @@ mvn clean install -pl '!dev/kyuubi-codecov,!kyuubi-assembly' -DskipTests Since v1.1.0, Kyuubi support building with different Spark profiles, -Profile | Default | Since ---- | --- | --- --Pspark-3.1 | No | 1.1.0 --Pspark-3.2 | No | 1.4.0 --Pspark-3.3 | Yes | 1.6.0 - +| Profile | Default | Since | +|-------------|---------|-------| +| -Pspark-3.1 | No | 1.1.0 | +| -Pspark-3.2 | No | 1.4.0 | +| -Pspark-3.3 | Yes | 1.6.0 | ## Building with Apache dlcdn site @@ -81,6 +79,15 @@ For example, build/mvn clean package -Pmirror-cdn ``` -The profile migrates your download repo to the Apache offically suggested site - https://dlcdn.apache.org. +The profile migrates your download repo to the Apache officially suggested site - https://dlcdn.apache.org. Note that, this site only holds the latest versions of Apache releases. You may fail if the specific version defined by `spark.version` or `flink.version` is overdue. + +## Building with the `fast` profile + +The `fast` profile helps to significantly reduce build time, which is useful for development or compilation validation, by skipping running the tests, code style checks, building scaladoc, enforcer rules and downloading engine archives used for tests. + +```bash +build/mvn clean package -Pfast +``` + diff --git a/docs/develop_tools/debugging.md b/docs/develop_tools/debugging.md index 90ebd58f67a..faf7173e427 100644 --- a/docs/develop_tools/debugging.md +++ b/docs/develop_tools/debugging.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Debugging Kyuubi @@ -24,15 +23,16 @@ with your favorite IDE tool, e.g. IntelliJ IDEA. ## Debugging Server We can configure the JDWP agent in `KYUUBI_JAVA_OPTS` for debugging. - - + For example, + ```bash KYUUBI_JAVA_OPTS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005 \ bin/kyuubi start ``` In the IDE, you set the corresponding parameters(host&port) in debug configurations, for example, +
      ![](../imgs/idea_debug.png) @@ -107,4 +107,5 @@ env.java.opts.historyserver -agentlib:jdwp=transport=dt_socket,server=y,suspen ```bash env.java.opts.client -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005 -``` \ No newline at end of file +``` + diff --git a/docs/develop_tools/developer.md b/docs/develop_tools/developer.md index 5f69f4a1ba5..329e219de46 100644 --- a/docs/develop_tools/developer.md +++ b/docs/develop_tools/developer.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Developer Tools @@ -45,7 +44,6 @@ You can run `build/dependency.sh` locally first to detect the potential dependen If the changes look expected, run `build/dependency.sh --replace` to update `dev/dependencyList` in your Pull request. - ## Format All Code Kyuubi uses [Spotless](https://github.com/diffplug/spotless/tree/main/plugin-maven) @@ -54,10 +52,9 @@ to format the Java and Scala code. You can run `dev/reformat` to format all Java and Scala code. - ## Append descriptions of new configurations to settings.md Kyuubi uses settings.md to explain available configurations. -You can run `KYUUBI_UPDATE=1 build/mvn clean install -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration` -to append descriptions of new configurations to settings.md. \ No newline at end of file +You can run `KYUUBI_UPDATE=1 build/mvn clean test -pl kyuubi-server -am -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration` +to append descriptions of new configurations to settings.md. diff --git a/docs/develop_tools/distribution.md b/docs/develop_tools/distribution.md index 680f4e212a7..217f0a4178d 100644 --- a/docs/develop_tools/distribution.md +++ b/docs/develop_tools/distribution.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Building a Runnable Distribution @@ -27,15 +26,16 @@ For more information on usage, run `./build/dist --help` ./build/dist - Tool for making binary distributions of Kyuubi Usage: -+------------------------------------------------------------------------------------------------------+ -| ./build/dist [--name ] [--tgz] [--flink-provided] [--spark-provided] [--hive-provided] | -| [--mvn ] | -+------------------------------------------------------------------------------------------------------+ ++----------------------------------------------------------------------------------------------+ +| ./build/dist [--name ] [--tgz] [--web-ui] [--flink-provided] [--hive-provided] | +| [--spark-provided] [--mvn ] | ++----------------------------------------------------------------------------------------------+ name: - custom binary name, using project version if undefined tgz: - whether to make a whole bundled package +web-ui: - whether to include web ui flink-provided: - whether to make a package without Flink binary -spark-provided: - whether to make a package without Spark binary hive-provided: - whether to make a package without Hive binary +spark-provided: - whether to make a package without Spark binary mvn: - external maven executable location ``` @@ -47,7 +47,7 @@ For instance, This results in a Kyuubi distribution named `apache-kyuubi-{version}-bin-custom-name.tgz` for you. -If you are planing to deploy Kyuubi where `spark`/`flink`/`hive` is provided, in other word, it's not required to bundle spark/flink/hive binary, use +If you are planing to deploy Kyuubi where `spark`/`flink`/`hive` is provided, in other word, it's not required to bundle spark/flink/hive binary, use ```bash ./build/dist --tgz --spark-provided --flink-provided --hive-provided diff --git a/docs/develop_tools/idea_setup.md b/docs/develop_tools/idea_setup.md index bf5c44d54d9..96ba33bc434 100644 --- a/docs/develop_tools/idea_setup.md +++ b/docs/develop_tools/idea_setup.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # IntelliJ IDEA Setup Guide @@ -35,9 +34,9 @@ profile: to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/docs/develop_tools/testing.md b/docs/develop_tools/testing.md index deb984f45d7..3e63aa1a22f 100644 --- a/docs/develop_tools/testing.md +++ b/docs/develop_tools/testing.md @@ -1,24 +1,24 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Running Tests -**Kyuubi** can be tested based on [Apache Maven](http://maven.apache.org) and the ScalaTest Maven Plugin, -please refer to the [ScalaTest documentation](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin), +**Kyuubi** can be tested based on [Apache Maven](https://maven.apache.org) and the ScalaTest Maven Plugin, +please refer to the [ScalaTest documentation](https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin), ## Running Tests Fully @@ -52,3 +52,4 @@ You can leverage the ready-made tool for creating a binary distribution. ```bash ./build/dist ``` + diff --git a/docs/extensions/engines/spark/functions.md b/docs/extensions/engines/spark/functions.md index b467a3abbc3..66f22aea860 100644 --- a/docs/extensions/engines/spark/functions.md +++ b/docs/extensions/engines/spark/functions.md @@ -1,31 +1,30 @@ - - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> + # Auxiliary SQL Functions Kyuubi provides several auxiliary SQL functions as supplement to Spark's [Built-in Functions](https://spark.apache.org/docs/latest/api/sql/index.html#built-in-functions) -Name | Description | Return Type | Since ---- | --- | --- | --- -kyuubi_version | Return the version of Kyuubi Server | string | 1.3.0 -engine_name | Return the spark application name for the associated query engine | string | 1.3.0 -engine_id | Return the spark application id for the associated query engine | string | 1.4.0 -system_user | Return the system user name for the associated query engine | string | 1.3.0 -session_user | Return the session username for the associated query engine | string | 1.4.0 +| Name | Description | Return Type | Since | +|----------------|-------------------------------------------------------------------|-------------|-------| +| kyuubi_version | Return the version of Kyuubi Server | string | 1.3.0 | +| engine_name | Return the spark application name for the associated query engine | string | 1.3.0 | +| engine_id | Return the spark application id for the associated query engine | string | 1.4.0 | +| system_user | Return the system user name for the associated query engine | string | 1.3.0 | +| session_user | Return the session username for the associated query engine | string | 1.4.0 | diff --git a/docs/extensions/engines/spark/jdbc-dialect.md b/docs/extensions/engines/spark/jdbc-dialect.md index 82aa453f397..e22c3392669 100644 --- a/docs/extensions/engines/spark/jdbc-dialect.md +++ b/docs/extensions/engines/spark/jdbc-dialect.md @@ -1,28 +1,26 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Hive Dialect Support Hive Dialect plugin aims to provide Hive Dialect support to Spark's JDBC source. It will auto registered to Spark and applied to JDBC sources with url prefix of `jdbc:hive2://` or `jdbc:kyuubi://`. -Hive Dialect helps to solve failures access Kyuubi. It fails and unexpected results when querying data from Kyuubi as JDBC source with Hive JDBC Driver or Kyuubi Hive JDBC Driver in Spark, as Spark JDBC provides no Hive Dialect support out of box and quoting columns and other identifiers in ANSI as "table.column" rather than in HiveSQL style as \`table\`.\`column\`. - +Hive Dialect helps to solve failures access Kyuubi. It fails and unexpected results when querying data from Kyuubi as JDBC source with Hive JDBC Driver or Kyuubi Hive JDBC Driver in Spark, as Spark JDBC provides no Hive Dialect support out of box and quoting columns and other identifiers in ANSI as "table.column" rather than in HiveSQL style as \`table\`.\`column\`. ## Features @@ -33,10 +31,11 @@ Hive Dialect helps to solve failures access Kyuubi. It fails and unexpected resu ## Usage 1. Get the Kyuubi Hive Dialect Extension jar - 1. compile the extension by executing `build/mvn clean package -pl :kyuubi-extension-spark-jdbc-dialect_2.12 -DskipTests` - 2. get the extension jar under `extensions/spark/kyuubi-extension-spark-jdbc-dialect/target` - 3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.2 by compiling with `-Pspark-3.1` + 1. compile the extension by executing `build/mvn clean package -pl :kyuubi-extension-spark-jdbc-dialect_2.12 -DskipTests` + 2. get the extension jar under `extensions/spark/kyuubi-extension-spark-jdbc-dialect/target` + 3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.2 by compiling with `-Pspark-3.1` 2. Put the Kyuubi Hive Dialect Extension jar `kyuubi-extension-spark-jdbc-dialect_-*.jar` into `$SPARK_HOME/jars` -3. Enable `KyuubiSparkJdbcDialectExtension`, by setting `spark.sql.extensions=org.apache.spark.sql.dialect.KyuubiSparkJdbcDialectExtension`, i.e. +3. Enable `KyuubiSparkJdbcDialectExtension`, by setting `spark.sql.extensions=org.apache.spark.sql.dialect.KyuubiSparkJdbcDialectExtension`, i.e. - add a config into `$SPARK_HOME/conf/spark-defaults.conf` - or add setting config in SparkSession builder + diff --git a/docs/extensions/engines/spark/lineage.md b/docs/extensions/engines/spark/lineage.md index 59cb6187bd8..cd38be4ba12 100644 --- a/docs/extensions/engines/spark/lineage.md +++ b/docs/extensions/engines/spark/lineage.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # SQL Lineage Support @@ -25,13 +25,14 @@ The source table is related to the result set, which is the SQL Lineage. ## Introduction The current lineage parsing functionality is implemented as a plugin by extending Spark's `QueryExecutionListener`. -1. The `SparkListenerSQLExecutionEnd` event is triggered after the SQL execution is finished and captured by the `QueryExecuctionListener`, - where the SQL lineage parsing process is performed on the successfully executed SQL. +1. The `SparkListenerSQLExecutionEnd` event is triggered after the SQL execution is finished and captured by the `QueryExecuctionListener`, +where the SQL lineage parsing process is performed on the successfully executed SQL. 2. Will write the parsed lineage information to the log file in JSON format. ### Example When the following SQL is executed: + ```sql ## table create table test_table0(a string, b string) @@ -39,7 +40,9 @@ create table test_table0(a string, b string) ## query select a as col0, b as col1 from test_table0 ``` + The lineage of this SQL: + ```json { "inputTables": ["default.test_table0"], @@ -56,21 +59,22 @@ The lineage of this SQL: #### Lineage specific identification -- `__count__`. Means that the column is an `count(*)` aggregate expression +- `__count__`. Means that the column is an `count(*)` aggregate expression and cannot extract the specific column. Lineage of the column like `default.test_table0.__count__`. - `__local__`. Means that the lineage of the table is a `LocalRelation` and not the real table, like `__local__.a` - ### SQL type support Currently supported column lineage for spark's `Command` and `Query` type: #### Query + - `Select` #### Command + - `AlterViewAsCommand` - `AppendData` - `CreateDataSourceTableAsSelectCommand` @@ -89,12 +93,11 @@ Currently supported column lineage for spark's `Command` and `Query` type: - `ReplaceTableAsSelect` - `SaveIntoDataSourceCommand` - ## Building ### Build with Apache Maven -Kyuubi Spark Lineage Listener Extension is built using [Apache Maven](http://maven.apache.org). +Kyuubi Spark Lineage Listener Extension is built using [Apache Maven](https://maven.apache.org). To build it, `cd` to the root direct of kyuubi project and run: ```shell @@ -120,19 +123,19 @@ build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -DskipTests -Dspark.versi The available `spark.version`s are shown in the following table. -| Spark Version | Supported | Remark | -|:-----------------:|:-----------:|:--------------------------------------------------------------------------------------------------------------------------------:| -| master | √ | - | -| 3.3.x | √ | - | -| 3.2.x | √ | - | -| 3.1.x | √ | - | -| 3.0.x | √ | - | -| 2.4.x | x | - | +| Spark Version | Supported | Remark | +|:-------------:|:---------:|:------:| +| master | √ | - | +| 3.3.x | √ | - | +| 3.2.x | √ | - | +| 3.1.x | √ | - | +| 3.0.x | √ | - | +| 2.4.x | x | - | Currently, Spark released with Scala 2.12 are supported. - ### Test with ScalaTest Maven plugin + If you omit `-DskipTests` option in the command above, you will also get all unit tests run. ```shell @@ -149,8 +152,6 @@ The tests will suspend at startup and wait for a remote debugger to attach to th We will appreciate if you can share the bug or the fix to the Kyuubi community. - - ## Installing With the `kyuubi-spark-lineage_*.jar` and its transitive dependencies available for spark runtime classpath, such as @@ -167,15 +168,43 @@ Add `org.apache.kyuubi.plugin.lineage.SparkOperationLineageQueryExecutionListene spark.sql.queryExecutionListeners=org.apache.kyuubi.plugin.lineage.SparkOperationLineageQueryExecutionListener ``` -### Settings for Lineage Logger and Path +### Optional configuration + +#### Whether to Skip Permanent View Resolution + +If enabled, lineage resolution will stop at permanent views and treats them as physical tables. We need +to add one configurations. + +```properties +spark.kyuubi.plugin.lineage.skip.parsing.permanent.view.enabled=true +``` + +### Get Lineage Events + +The lineage dispatchers are used to dispatch lineage events, configured via `spark.kyuubi.plugin.lineage.dispatchers`. -#### Lineage Logger Path -The location of all the engine operation lineage events go for the builtin JSON logger. -We first need set `kyuubi.engine.event.loggers` to `JSON`. -All operation lineage events will be written in the unified event json logger path, which be setting with -`kyuubi.engine.event.json.log.path`. We can get the lineage logger from the `operation_lineage` dir in the -`kyuubi.engine.event.json.log.path`. +
        +
      • SPARK_EVENT (by default): send lineage event to spark event bus
      • +
      • KYUUBI_EVENT: send lineage event to kyuubi event bus
      • +
      +#### Get Lineage Events from SparkListener +When using the `SPARK_EVENT` dispatcher, the lineage events will be sent to the `SparkListenerBus`. To handle lineage events, a new `SparkListener` needs to be added. +Example for Adding `SparkListener`: + +```scala +spark.sparkContext.addSparkListener(new SparkListener { + override def onOtherEvent(event: SparkListenerEvent): Unit = { + event match { + case lineageEvent: OperationLineageEvent => + // Your processing logic + case _ => + } + } + }) +``` +#### Get Lineage Events from Kyuubi EventHandler +When using the `KYUUBI_EVENT` dispatcher, the lineage events will be sent to the Kyuubi `EventBus`. Refer to [Kyuubi Event Handler](../../server/events) to handle kyuubi events. diff --git a/docs/extensions/engines/spark/rules.md b/docs/extensions/engines/spark/rules.md index 535804dc241..a4bda5d53ff 100644 --- a/docs/extensions/engines/spark/rules.md +++ b/docs/extensions/engines/spark/rules.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Auxiliary Optimization Rules @@ -24,19 +23,17 @@ And don't worry, Kyuubi will support the new Apache Spark version in the future. ## Features - merging small files automatically - + Small files is a long time issue with Apache Spark. Kyuubi can merge small files by adding an extra shuffle. Currently, Kyuubi supports handle small files with datasource table and hive table, and also Kyuubi support optimize dynamic partition insertion. For example, a common write query `INSERT INTO TABLE $table1 SELECT * FROM $table2`, Kyuubi will introduce an extra shuffle before write and then the small files will go away. - - insert shuffle node before Join to make AQE OptimizeSkewedJoin work In current implementation, Apache Spark can only optimize skewed join by the standard join which means a join must have two sort and shuffle node. However, in complex scenario this assuming will be broken easily. Kyuubi can guarantee the join is standard by adding an extra shuffle node before join. So that, OptimizeSkewedJoin can work better. - - stage level config isolation in AQE As we know, `spark.sql.adaptive.advisoryPartitionSizeInBytes` is a key config in Apache Spark AQE. @@ -44,7 +41,6 @@ And don't worry, Kyuubi will support the new Apache Spark version in the future. However, in general, we expect a file is big enough like 256MB or 512MB. Kyuubi can make the config isolation to solve the conflict so that we can make staging partition data size small and last partition data size big. - ## Usage | Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile | @@ -63,28 +59,35 @@ And don't worry, Kyuubi will support the new Apache Spark version in the future. Now, you can enjoy the Kyuubi SQL Extension. - ## Additional Configurations Kyuubi provides some configs to make these feature easy to use. -| Name | Default Value | Description | Since | -|---------------------------------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------| -| spark.sql.optimizer.insertRepartitionBeforeWrite.enabled | true | Add repartition node at the top of query plan. An approach of merging small files. | 1.2.0 | -| spark.sql.optimizer.insertRepartitionNum | none | The partition number if `spark.sql.optimizer.insertRepartitionBeforeWrite.enabled` is enabled. If AQE is disabled, the default value is `spark.sql.shuffle.partitions`. If AQE is enabled, the default value is none that means depend on AQE. | 1.2.0 | -| spark.sql.optimizer.dynamicPartitionInsertionRepartitionNum | 100 | The partition number of each dynamic partition if `spark.sql.optimizer.insertRepartitionBeforeWrite.enabled` is enabled. We will repartition by dynamic partition columns to reduce the small file but that can cause data skew. This config is to extend the partition of dynamic partition column to avoid skew but may generate some small files. | 1.2.0 | -| spark.sql.optimizer.forceShuffleBeforeJoin.enabled | false | Ensure shuffle node exists before shuffled join (shj and smj) to make AQE `OptimizeSkewedJoin` works (complex scenario join, multi table join). | 1.2.0 | -| spark.sql.optimizer.finalStageConfigIsolation.enabled | false | If true, the final stage support use different config with previous stage. The prefix of final stage config key should be `spark.sql.finalStage.`. For example, the raw spark config: `spark.sql.adaptive.advisoryPartitionSizeInBytes`, then the final stage config should be: `spark.sql.finalStage.adaptive.advisoryPartitionSizeInBytes`. | 1.2.0 | -| spark.sql.analyzer.classification.enabled | false | When true, allows Kyuubi engine to judge this SQL's classification and set `spark.sql.analyzer.classification` back into sessionConf. Through this configuration item, Spark can optimizing configuration dynamic. | 1.4.0 | -| spark.sql.optimizer.insertZorderBeforeWriting.enabled | true | When true, we will follow target table properties to insert zorder or not. The key properties are: 1) `kyuubi.zorder.enabled`: if this property is true, we will insert zorder before writing data. 2) `kyuubi.zorder.cols`: string split by comma, we will zorder by these cols. | 1.4.0 | -| spark.sql.optimizer.zorderGlobalSort.enabled | true | When true, we do a global sort using zorder. Note that, it can cause data skew issue if the zorder columns have less cardinality. When false, we only do local sort using zorder. | 1.4.0 | -| spark.sql.watchdog.maxPartitions | none | Set the max partition number when spark scans a data source. Enable MaxPartitionStrategy by specifying this configuration. Add maxPartitions Strategy to avoid scan excessive partitions on partitioned table, it's optional that works with defined | 1.4.0 | -| spark.sql.optimizer.dropIgnoreNonExistent | false | When true, do not report an error if DROP DATABASE/TABLE/VIEW/FUNCTION/PARTITION specifies a non-existent database/table/view/function/partition | 1.5.0 | -| spark.sql.optimizer.rebalanceBeforeZorder.enabled | false | When true, we do a rebalance before zorder in case data skew. Note that, if the insertion is dynamic partition we will use the partition columns to rebalance. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | -| spark.sql.optimizer.rebalanceZorderColumns.enabled | false | When true and `spark.sql.optimizer.rebalanceBeforeZorder.enabled` is true, we do rebalance before Z-Order. If it's dynamic partition insert, the rebalance expression will include both partition columns and Z-Order columns. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | -| spark.sql.optimizer.twoPhaseRebalanceBeforeZorder.enabled | false | When true and `spark.sql.optimizer.rebalanceBeforeZorder.enabled` is true, we do two phase rebalance before Z-Order for the dynamic partition write. The first phase rebalance using dynamic partition column; The second phase rebalance using dynamic partition column Z-Order columns. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | -| spark.sql.optimizer.zorderUsingOriginalOrdering.enabled | false | When true and `spark.sql.optimizer.rebalanceBeforeZorder.enabled` is true, we do sort by the original ordering i.e. lexicographical order. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | -| spark.sql.optimizer.inferRebalanceAndSortOrders.enabled | false | When ture, infer columns for rebalance and sort orders from original query, e.g. the join keys from join. It can avoid compression ratio regression. | 1.7.0 | -| spark.sql.optimizer.inferRebalanceAndSortOrdersMaxColumns | 3 | The max columns of inferred columns. | 1.7.0 | -| spark.sql.optimizer.insertRepartitionBeforeWriteIfNoShuffle.enabled | false | When true, add repartition even if the original plan does not have shuffle. | 1.7.0 | -| spark.sql.optimizer.finalStageConfigIsolationWriteOnly.enabled | true | When true, only enable final stage isolation for writing. | 1.7.0 | \ No newline at end of file +| Name | Default Value | Description | Since | +|---------------------------------------------------------------------|----------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------| +| spark.sql.optimizer.insertRepartitionBeforeWrite.enabled | true | Add repartition node at the top of query plan. An approach of merging small files. | 1.2.0 | +| spark.sql.optimizer.insertRepartitionNum | none | The partition number if `spark.sql.optimizer.insertRepartitionBeforeWrite.enabled` is enabled. If AQE is disabled, the default value is `spark.sql.shuffle.partitions`. If AQE is enabled, the default value is none that means depend on AQE. | 1.2.0 | +| spark.sql.optimizer.dynamicPartitionInsertionRepartitionNum | 100 | The partition number of each dynamic partition if `spark.sql.optimizer.insertRepartitionBeforeWrite.enabled` is enabled. We will repartition by dynamic partition columns to reduce the small file but that can cause data skew. This config is to extend the partition of dynamic partition column to avoid skew but may generate some small files. | 1.2.0 | +| spark.sql.optimizer.forceShuffleBeforeJoin.enabled | false | Ensure shuffle node exists before shuffled join (shj and smj) to make AQE `OptimizeSkewedJoin` works (complex scenario join, multi table join). | 1.2.0 | +| spark.sql.optimizer.finalStageConfigIsolation.enabled | false | If true, the final stage support use different config with previous stage. The prefix of final stage config key should be `spark.sql.finalStage.`. For example, the raw spark config: `spark.sql.adaptive.advisoryPartitionSizeInBytes`, then the final stage config should be: `spark.sql.finalStage.adaptive.advisoryPartitionSizeInBytes`. | 1.2.0 | +| spark.sql.analyzer.classification.enabled | false | When true, allows Kyuubi engine to judge this SQL's classification and set `spark.sql.analyzer.classification` back into sessionConf. Through this configuration item, Spark can optimizing configuration dynamic. | 1.4.0 | +| spark.sql.optimizer.insertZorderBeforeWriting.enabled | true | When true, we will follow target table properties to insert zorder or not. The key properties are: 1) `kyuubi.zorder.enabled`: if this property is true, we will insert zorder before writing data. 2) `kyuubi.zorder.cols`: string split by comma, we will zorder by these cols. | 1.4.0 | +| spark.sql.optimizer.zorderGlobalSort.enabled | true | When true, we do a global sort using zorder. Note that, it can cause data skew issue if the zorder columns have less cardinality. When false, we only do local sort using zorder. | 1.4.0 | +| spark.sql.watchdog.maxPartitions | none | Set the max partition number when spark scans a data source. Enable MaxPartitionStrategy by specifying this configuration. Add maxPartitions Strategy to avoid scan excessive partitions on partitioned table, it's optional that works with defined | 1.4.0 | +| spark.sql.optimizer.dropIgnoreNonExistent | false | When true, do not report an error if DROP DATABASE/TABLE/VIEW/FUNCTION/PARTITION specifies a non-existent database/table/view/function/partition | 1.5.0 | +| spark.sql.optimizer.rebalanceBeforeZorder.enabled | false | When true, we do a rebalance before zorder in case data skew. Note that, if the insertion is dynamic partition we will use the partition columns to rebalance. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | +| spark.sql.optimizer.rebalanceZorderColumns.enabled | false | When true and `spark.sql.optimizer.rebalanceBeforeZorder.enabled` is true, we do rebalance before Z-Order. If it's dynamic partition insert, the rebalance expression will include both partition columns and Z-Order columns. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | +| spark.sql.optimizer.twoPhaseRebalanceBeforeZorder.enabled | false | When true and `spark.sql.optimizer.rebalanceBeforeZorder.enabled` is true, we do two phase rebalance before Z-Order for the dynamic partition write. The first phase rebalance using dynamic partition column; The second phase rebalance using dynamic partition column Z-Order columns. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | +| spark.sql.optimizer.zorderUsingOriginalOrdering.enabled | false | When true and `spark.sql.optimizer.rebalanceBeforeZorder.enabled` is true, we do sort by the original ordering i.e. lexicographical order. Note that, this config only affects with Spark 3.3.x. | 1.6.0 | +| spark.sql.optimizer.inferRebalanceAndSortOrders.enabled | false | When ture, infer columns for rebalance and sort orders from original query, e.g. the join keys from join. It can avoid compression ratio regression. | 1.7.0 | +| spark.sql.optimizer.inferRebalanceAndSortOrdersMaxColumns | 3 | The max columns of inferred columns. | 1.7.0 | +| spark.sql.optimizer.insertRepartitionBeforeWriteIfNoShuffle.enabled | false | When true, add repartition even if the original plan does not have shuffle. | 1.7.0 | +| spark.sql.optimizer.finalStageConfigIsolationWriteOnly.enabled | true | When true, only enable final stage isolation for writing. | 1.7.0 | +| spark.sql.finalWriteStage.eagerlyKillExecutors.enabled | false | When true, eagerly kill redundant executors before running final write stage. | 1.8.0 | +| spark.sql.finalWriteStage.retainExecutorsFactor | 1.2 | If the target executors * factor < active executors, and target executors * factor > min executors, then inject kill executors or inject custom resource profile. | 1.8.0 | +| spark.sql.finalWriteStage.resourceIsolation.enabled | false | When true, make final write stage resource isolation using custom RDD resource profile. | 1.2.0 | +| spark.sql.finalWriteStageExecutorCores | fallback spark.executor.cores | Specify the executor core request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | +| spark.sql.finalWriteStageExecutorMemory | fallback spark.executor.memory | Specify the executor on heap memory request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | +| spark.sql.finalWriteStageExecutorMemoryOverhead | fallback spark.executor.memoryOverhead | Specify the executor memory overhead request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | +| spark.sql.finalWriteStageExecutorOffHeapMemory | NONE | Specify the executor off heap memory request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | + diff --git a/docs/extensions/engines/spark/z-order-benchmark.md b/docs/extensions/engines/spark/z-order-benchmark.md index d820eee19fd..293042530da 100644 --- a/docs/extensions/engines/spark/z-order-benchmark.md +++ b/docs/extensions/engines/spark/z-order-benchmark.md @@ -1,20 +1,19 @@ - +- x to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Z-order Benchmark @@ -76,7 +75,6 @@ INSERT overwrite table conn_order select src_ip, src_port, dst_ip, dst_port from OPTIMIZE conn_zorder ZORDER BY src_ip, src_port, dst_ip, dst_port; ``` - The complete code is as follows: ```shell @@ -152,7 +150,6 @@ spark.stop() Z-order Optimize statement: ```sql - set spark.sql.hive.convertMetastoreParquet=false; OPTIMIZE conn_zorder_only_ip ZORDER BY src_ip, dst_ip; @@ -166,13 +163,11 @@ ORDER BY statement: INSERT overwrite table conn_order_only_ip select src_ip, src_port, dst_ip, dst_port from conn_random_parquet order by src_ip, dst_ip; INSERT overwrite table conn_order select src_ip, src_port, dst_ip, dst_port from conn_random_parquet order by src_ip, src_port, dst_ip, dst_port; - ``` Query statement: ```sql - set spark.sql.hive.convertMetastoreParquet=true; select count(*) from conn_random_parquet where src_ip like '157%' and dst_ip like '216.%'; @@ -182,10 +177,9 @@ select count(*) from conn_zorder_only_ip where src_ip like '157%' and dst_ip lik select count(*) from conn_zorder where src_ip like '157%' and dst_ip like '216.%'; ``` - ## Benchmark result -We have done two performance tests: one is to compare the efficiency of Z-order Optimize and Order by Sort, +We have done two performance tests: one is to compare the efficiency of Z-order Optimize and Order by Sort, and the other is to query based on the optimized Z-order by data and Random data. ### Efficiency of Z-order Optimize and Order-by Sort @@ -194,17 +188,17 @@ and the other is to query based on the optimized Z-order by data and Random data Z-order by or order by only ip: -| Table | row count | optimize time | -| ------------------- | -------------- | ------------------ | -| conn_order_only_ip | 10,000,000,000 | 1591.99 s | -| conn_zorder_only_ip | 10,000,000,000 | 8371.405 s | +| Table | row count | optimize time | +|---------------------|----------------|----------------| +| conn_order_only_ip | 10,000,000,000 | 1591.99 s | +| conn_zorder_only_ip | 10,000,000,000 | 8371.405 s | Z-order by or order by all columns: -| Table | row count | optimize time | -| ------------------- | -------------- | ------------------ | -| conn_order | 10,000,000,000 | 1515.298 s | -| conn_zorder | 10,000,000,000 | 11057.194 s | +| Table | row count | optimize time | +|-------------|----------------|----------------| +| conn_order | 10,000,000,000 | 1515.298 s | +| conn_zorder | 10,000,000,000 | 11057.194 s | ### Z-order by benchmark result @@ -212,28 +206,24 @@ By querying the tables before and after optimization, we find that: **10 billion data and 200 files and Query resource: 200 core 600G memory** -| Table | Average File Size | Scan row count | Average query time | row count Skipping ratio | -| ------------------- | ----------------- | -------------- | ------------------ | ------------------------ | +| Table | Average File Size | Scan row count | Average query time | row count Skipping ratio | +|---------------------|-------------------|----------------|--------------------|--------------------------| | conn_random_parquet | 1.2 G | 10,000,000,000 | 27.554 s | 0.0% | | conn_zorder_only_ip | 890 M | 43,170,600 | 2.459 s | 99.568% | | conn_zorder | 890 M | 54,841,302 | 3.185 s | 99.451% | - - **10 billion data and 1000 files and Query resource: 200 core 600G memory** -| Table | Average File Size | Scan row count | Average query time | row count Skipping ratio | -| ------------------- | ----------------- | -------------- | ------------------ | ------------------------ | +| Table | Average File Size | Scan row count | Average query time | row count Skipping ratio | +|---------------------|-------------------|----------------|--------------------|--------------------------| | conn_random_parquet | 234.8 M | 10,000,000,000 | 27.031 s | 0.0% | | conn_zorder_only_ip | 173.9 M | 53,499,068 | 3.120 s | 99.465% | | conn_zorder | 174.0 M | 35,910,500 | 3.103 s | 99.640% | - - **1 billion data and 10000 files and Query resource: 10 core 40G memory** -| Table | Average File Size | Scan row count | Average query time | row count Skipping ratio | -| ------------------- | ----------------- | -------------- | ------------------ | ------------------------ | +| Table | Average File Size | Scan row count | Average query time | row count Skipping ratio | +|---------------------|-------------------|----------------|--------------------|--------------------------| | conn_random_parquet | 2.7 M | 1,000,000,000 | 76.772 s | 0.0% | | conn_zorder_only_ip | 2.1 M | 406,572 | 3.963 s | 99.959% | | conn_zorder | 2.2 M | 387,942 | 3.621s | 99.961% | diff --git a/docs/extensions/engines/spark/z-order.md b/docs/extensions/engines/spark/z-order.md index 0ac41529cbe..d04ca3e0c29 100644 --- a/docs/extensions/engines/spark/z-order.md +++ b/docs/extensions/engines/spark/z-order.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Z-Ordering Support @@ -22,7 +22,6 @@ stored in all kind of storage with various data format. Please check our benchmark report [here](z-order-benchmark.md). - ## Introduction The following picture shows the workflow of z-order. @@ -32,7 +31,7 @@ The following picture shows the workflow of z-order. It contains three parties: - Upstream - Due to the extra sort, the upstream job will run a little slower than before +Due to the extra sort, the upstream job will run a little slower than before - Table @@ -46,31 +45,31 @@ It contains three parties: | Table Format | Supported | |--------------|-----------| -| parquet | Y | -| orc | Y | -| json | N | -| csv | N | -| text | N | +| parquet | Y | +| orc | Y | +| json | N | +| csv | N | +| text | N | ### Supported column data type | Column Data Type | Supported | |------------------|-----------| -| byte | Y | -| short | Y | -| int | Y | -| long | Y | -| float | Y | -| double | Y | -| boolean | Y | -| string | Y | -| decimal | Y | -| date | Y | -| timestamp | Y | -| array | N | -| map | N | -| struct | N | -| udt | N | +| byte | Y | +| short | Y | +| int | Y | +| long | Y | +| float | Y | +| double | Y | +| boolean | Y | +| string | Y | +| decimal | Y | +| date | Y | +| timestamp | Y | +| array | N | +| map | N | +| struct | N | +| udt | N | ## How to use @@ -86,6 +85,7 @@ Due to the extension, z-order only works with Spark-3.1 and higher version. If you want to optimize the history data of a table, the `OPTIMIZE ...` syntax is good to go. Due to Spark SQL doesn't support read and overwrite same datasource table, the syntax can only support to optimize Hive table. #### Syntax + ```sql OPTIMIZE table_name [WHERE predicate] ZORDER BY col_name1 [, ...] ``` @@ -93,6 +93,7 @@ OPTIMIZE table_name [WHERE predicate] ZORDER BY col_name1 [, ...] Note that, the `predicate` only supports partition spec. #### Examples + ```sql OPTIMIZE t1 ZORDER BY c3; @@ -104,9 +105,11 @@ OPTIMIZE t1 WHERE day = '2021-12-01' ZORDER BY c1,c2; ### Optimize incremental data Kyuubi supports optimize a table automatically for incremental data. e.g., time partitioned table. The only things you need to do is adding Kyuubi properties into the target table properties: + ```sql ALTER TABLE t1 SET TBLPROPERTIES('kyuubi.zorder.enabled'='true','kyuubi.zorder.cols'='c1,c2'); ``` + - the key `kyuubi.zorder.enabled` decide if the table allows Kyuubi to optimize by z-order. - the key `kyuubi.zorder.cols` decide which columns are used to optimize by z-order. @@ -119,3 +122,4 @@ INSERT OVERWRITE TABLE t1 PARTITION() ...; CREATE TABLE t1 AS SELECT ...; ``` + diff --git a/docs/imgs/kyuubi_ecosystem.drawio b/docs/imgs/kyuubi_ecosystem.drawio index 723b306e825..7171491efec 100644 --- a/docs/imgs/kyuubi_ecosystem.drawio +++ b/docs/imgs/kyuubi_ecosystem.drawio @@ -1 +1 @@  \ No newline at end of file  \ No newline at end of file diff --git a/docs/imgs/kyuubi_ecosystem.drawio.png b/docs/imgs/kyuubi_ecosystem.drawio.png index 19de7adb52e..72d221d1040 100644 Binary files a/docs/imgs/kyuubi_ecosystem.drawio.png and b/docs/imgs/kyuubi_ecosystem.drawio.png differ diff --git a/docs/make.bat b/docs/make.bat index 1f441aefc55..b8c48a2dba0 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -38,7 +38,7 @@ if errorlevel 9009 ( echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ + echo.https://www.sphinx-doc.org/ exit /b 1 ) diff --git a/docs/monitor/events.md b/docs/monitor/events.md index 3358d5766f4..fd8d91ca026 100644 --- a/docs/monitor/events.md +++ b/docs/monitor/events.md @@ -1,19 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Monitoring Kyuubi - Events System + diff --git a/docs/monitor/logging.md b/docs/monitor/logging.md index 57c673c25f2..24a5a88d699 100644 --- a/docs/monitor/logging.md +++ b/docs/monitor/logging.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Monitoring Kyuubi - Logging System @@ -259,10 +258,12 @@ You will both get the final results and the corresponding operation logs telling +-------------------------------------------------+--------------------+ 1 row selected (0.341 seconds) ``` + ## Further Readings - [Monitoring Kyuubi - Events System](events.md) - [Monitoring Kyuubi - Server Metrics](metrics.md) - [Trouble Shooting](trouble_shooting.md) - Spark Online Documentation - - [Monitoring and Instrumentation](http://spark.apache.org/docs/latest/monitoring.html) + - [Monitoring and Instrumentation](https://spark.apache.org/docs/latest/monitoring.html) + diff --git a/docs/monitor/metrics.md b/docs/monitor/metrics.md index 0a27cf43a40..561014c370c 100644 --- a/docs/monitor/metrics.md +++ b/docs/monitor/metrics.md @@ -1,89 +1,95 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Monitoring Kyuubi - Server Metrics Kyuubi has a configurable metrics system based on the [Dropwizard Metrics Library](https://metrics.dropwizard.io/). -This allows users to report Kyuubi metrics to a variety of `kyuubi.metrics.reporters`. +This allows users to report Kyuubi metrics to a variety of `kyuubi.metrics.reporters`. The metrics provide instrumentation for specific activities and Kyuubi server. ## Configurations The metrics system is configured via `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -`kyuubi.metrics.enabled`|
      true
      |
      Set to true to enable kyuubi metrics system
      |
      boolean
      |
      1.2.0
      -`kyuubi.metrics.reporters`|
      JSON
      |
      A comma separated list for all metrics reporters
      • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
      • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
      • JSON - JsonReporter which outputs measurements to json file periodically.
      • PROMETHEUS - PrometheusReporter which exposes metrics in prometheus format.
      • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
      |
      seq
      |
      1.2.0
      -`kyuubi.metrics.console.interval`|
      PT5S
      |
      How often should report metrics to console
      |
      duration
      |
      1.2.0
      -`kyuubi.metrics.json.interval`|
      PT5S
      |
      How often should report metrics to json file
      |
      duration
      |
      1.2.0
      -`kyuubi.metrics.json.location`|
      metrics
      |
      Where the json metrics file located
      |
      string
      |
      1.2.0
      -`kyuubi.metrics.prometheus.path`|
      /metrics
      |
      URI context path of prometheus metrics HTTP server
      |
      string
      |
      1.2.0
      -`kyuubi.metrics.prometheus.port`|
      10019
      |
      Prometheus metrics HTTP server port
      |
      int
      |
      1.2.0
      -`kyuubi.metrics.slf4j.interval`|
      PT5S
      |
      How often should report metrics to SLF4J logger
      |
      duration
      |
      1.2.0
      +| Key | Default | Meaning | Type | Since | +|-----------------------------------|-----------------------------------------------------------------------------------||-----------------------------------------|--------------------------------------| +| `kyuubi.metrics.enabled` |
      true
      |
      Set to true to enable kyuubi metrics system
      |
      boolean
      |
      1.2.0
      | +| `kyuubi.metrics.reporters` |
      JSON
      |
      A comma-separated list for all metrics reporters
      • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
      • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
      • JSON - JsonReporter which outputs measurements to json file periodically.
      • PROMETHEUS - PrometheusReporter which exposes metrics in Prometheus format.
      • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
      |
      seq
      |
      1.2.0
      | +| `kyuubi.metrics.console.interval` |
      PT5S
      |
      How often should report metrics to console
      |
      duration
      |
      1.2.0
      | +| `kyuubi.metrics.json.interval` |
      PT5S
      |
      How often should report metrics to JSON file
      |
      duration
      |
      1.2.0
      | +| `kyuubi.metrics.json.location` |
      metrics
      |
      Where the JSON metrics file located
      |
      string
      |
      1.2.0
      | +| `kyuubi.metrics.prometheus.path` |
      /metrics
      |
      URI context path of prometheus metrics HTTP server
      |
      string
      |
      1.2.0
      | +| `kyuubi.metrics.prometheus.port` |
      10019
      |
      Prometheus metrics HTTP server port
      |
      int
      |
      1.2.0
      | +| `kyuubi.metrics.slf4j.interval` |
      PT5S
      |
      How often should report metrics to SLF4J logger
      |
      duration
      |
      1.2.0
      | ## Metrics These metrics include: -Metrics Prefix | Metrics Suffix | Type | Since | Description ----|---|---|---|--- -`kyuubi.exec.pool.threads.alive` | | gauge | 1.2.0 |
      threads keepAlive in the backend executive thread pool
      -`kyuubi.exec.pool.threads.active` | | gauge | 1.2.0 |
      threads active in the backend executive thread pool
      -`kyuubi.connection.total` | | counter | 1.2.0 |
      cumulative connection count
      -`kyuubi.connection.opened` | | gauge | 1.2.0 |
      current active connection count
      -`kyuubi.connection.opened` | `${user}` | counter | 1.2.0 |
      current active connections count requested by a `${user}`
      -`kyuubi.connection.failed` | | counter | 1.2.0 |
      cumulative failed connection count
      -`kyuubi.connection.failed` | `${user}` | counter | 1.2.0 |
      cumulative failed connections for a `${user}`
      -`kyuubi.operation.total` | | counter | 1.5.0 |
      cumulative opened operation count
      -`kyuubi.operation.total` | `${operationType}` | counter | 1.5.0 |
      cumulative opened count for the operation `${operationType}`
      -`kyuubi.operation.opened` | | gauge | 1.5.0 |
      current opened operation count
      -`kyuubi.operation.opened` | `${operationType}` | counter | 1.5.0 |
      current opened count for the operation `${operationType}`
      -`kyuubi.operation.failed` | `${operationType}`
      `.${errorType}` | counter | 1.5.0 |
      cumulative failed count for the operation `${operationType}` with a particular `${errorType}`, e.g. `execute_statement.AnalysisException`
      -`kyuubi.operation.state` | `${operationState}` | meter | 1.5.0 |
      kyuubi operation state rate
      -`kyuubi.engine.total` | | counter | 1.2.0 |
      cumulative created engines
      -`kyuubi.engine.timeout` | | counter | 1.2.0 |
      cumulative timeout engines
      -`kyuubi.engine.failed` | `${user}` | counter | 1.2.0 |
      cumulative explicitly failed engine count for a `${user}`
      -`kyuubi.engine.failed` | `${errorType}` | counter | 1.2.0 |
      cumulative explicitly failed engine count for a particular `${errorType}`, e.g. `ClassNotFoundException`
      -`kyuubi.backend_service.open_session` | | timer | 1.5.0 |
      kyuubi backend service `openSession` method execution time and rate
      -`kyuubi.backend_service.close_session` | | timer | 1.5.0 |
      kyuubi backend service `closeSession` method execution time and rate
      -`kyuubi.backend_service.get_info` | | timer | 1.5.0 |
      kyuubi backend service `getInfo` method execution time and rate
      -`kyuubi.backend_service.execute_statement` | | timer | 1.5.0 |
      kyuubi backend service `executeStatement` method execution time and rate
      -`kyuubi.backend_service.get_type_info` | | timer | 1.5.0 |
      kyuubi backend service `getTypeInfo` method execution time and rate
      -`kyuubi.backend_service.get_catalogs` | | timer | 1.5.0 |
      kyuubi backend service `getCatalogs` method execution time and rate
      -`kyuubi.backend_service.get_schemas` | | timer | 1.5.0 |
      kyuubi backend service `getSchemas` method execution time and rate
      -`kyuubi.backend_service.get_tables` | | timer | 1.5.0 |
      kyuubi backend service `getTables` method execution time and rate
      -`kyuubi.backend_service.get_table_types` | | timer | 1.5.0 |
      kyuubi backend service `getTableTypes` method execution time and rate
      -`kyuubi.backend_service.get_columns` | | timer | 1.5.0 |
      kyuubi backend service `getColumns` method execution time and rate
      -`kyuubi.backend_service.get_functions` | | timer | 1.5.0 |
      kyuubi backend service `getFunctions` method execution time and rate
      -`kyuubi.backend_service.get_operation_status` | | timer | 1.5.0 |
      kyuubi backend service `getOperationStatus` method execution time and rate
      -`kyuubi.backend_service.cancel_operation` | | timer | 1.5.0 |
      kyuubi backend service `cancelOperation` method execution time and rate
      -`kyuubi.backend_service.close_operation` | | timer | 1.5.0 |
      kyuubi backend service `closeOperation` method execution time and rate
      -`kyuubi.backend_service.get_result_set_metadata` | | timer | 1.5.0 |
      kyuubi backend service `getResultSetMetadata` method execution time and rate
      -`kyuubi.backend_service.fetch_results` | | timer | 1.5.0 |
      kyuubi backend service `fetchResults` method execution time and rate
      -`kyuubi.backend_service.fetch_log_rows_rate` | | meter | 1.5.0 |
      kyuubi backend service `fetchResults` method that fetch log rows rate
      -`kyuubi.backend_service.fetch_result_rows_rate` | | meter | 1.5.0 |
      kyuubi backend service `fetchResults` method that fetch result rows rate
      -`kyuubi.backend_service.get_primary_keys` | | meter | 1.6.0 |
      kyuubi backend service `get_primary_keys` method execution time and rate
      -`kyuubi.backend_service.get_cross_reference` | | meter | 1.6.0 |
      kyuubi backend service `get_cross_reference` method execution time and rate
      -`kyuubi.operation.state` | `${operationType}`
      `.${state}` | meter | 1.6.0 |
      The `${operationType}` with a particular `${state}` rate, e.g. `BatchJobSubmission.pending`, `BatchJobSubmission.finished`. Note that, the terminal states are cumulative, but the intermediate ones are not.
      -`kyuubi.metadata.request.opened` | | counter | 1.6.1 |
      current opened count for the metadata requests
      -`kyuubi.metadata.request.total` | | meter | 1.6.0 |
      metadata requests time and rate
      -`kyuubi.metadata.request.failed` | | meter | 1.6.0 |
      metadata requests failure time and rate
      -`kyuubi.metadata.request.retrying` | | meter | 1.6.0 |
      retrying metadata requests time and rate, it is not cumulative
      +| Metrics Prefix | Metrics Suffix | Type | Since | Description | +|--------------------------------------------------|----------------------------------------|-----------|-------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `kyuubi.exec.pool.threads.alive` | | gauge | 1.2.0 |
      threads keepAlive in the backend executive thread pool
      | +| `kyuubi.exec.pool.threads.active` | | gauge | 1.2.0 |
      threads active in the backend executive thread pool
      | +| `kyuubi.exec.pool.work_queue.size` | | gauge | 1.7.0 |
      work queue size in the backend executive thread pool
      | +| `kyuubi.connection.total` | | counter | 1.2.0 |
      cumulative connection count
      | +| `kyuubi.connection.total` | `${sessionType}` | counter | 1.7.0 |
      cumulative connection count with session type `${sessionType}`
      | +| `kyuubi.connection.opened` | | gauge | 1.2.0 |
      current active connection count
      | +| `kyuubi.connection.opened` | `${user}` | counter | 1.2.0 |
      current active connections count requested by a `${user}`
      | +| `kyuubi.connection.opened` | `${user}`
      `${sessionType}` | counter | 1.7.0 |
      current active connections count requested by a `${user}` with session type `${sessionType}`
      | +| `kyuubi.connection.opened` | `${sessionType}` | counter | 1.7.0 |
      current active connections count with session type `${sessionType}`
      | +| `kyuubi.connection.failed` | | counter | 1.2.0 |
      cumulative failed connection count
      | +| `kyuubi.connection.failed` | `${user}` | counter | 1.2.0 |
      cumulative failed connections for a `${user}`
      | +| `kyuubi.connection.failed` | `${sessionType}` | counter | 1.7.0 |
      cumulative failed connection count with session type `${sessionType}`
      | +| `kyuubi.operation.total` | | counter | 1.5.0 |
      cumulative opened operation count
      | +| `kyuubi.operation.total` | `${operationType}` | counter | 1.5.0 |
      cumulative opened count for the operation `${operationType}`
      | +| `kyuubi.operation.opened` | | gauge | 1.5.0 |
      current opened operation count
      | +| `kyuubi.operation.opened` | `${operationType}` | counter | 1.5.0 |
      current opened count for the operation `${operationType}`
      | +| `kyuubi.operation.failed` | `${operationType}`
      `.${errorType}` | counter | 1.5.0 |
      cumulative failed count for the operation `${operationType}` with a particular `${errorType}`, e.g. `execute_statement.AnalysisException`
      | +| `kyuubi.operation.state` | `${operationState}` | meter | 1.5.0 |
      kyuubi operation state rate
      | +| `kyuubi.operation.exec_time` | `${operationType}` | histogram | 1.7.0 |
      execution time histogram for the operation `${operationType}`, now only `ExecuteStatement` is enabled.
      | +| `kyuubi.engine.total` | | counter | 1.2.0 |
      cumulative created engines
      | +| `kyuubi.engine.timeout` | | counter | 1.2.0 |
      cumulative timeout engines
      | +| `kyuubi.engine.failed` | `${user}` | counter | 1.2.0 |
      cumulative explicitly failed engine count for a `${user}`
      | +| `kyuubi.engine.failed` | `${errorType}` | counter | 1.2.0 |
      cumulative explicitly failed engine count for a particular `${errorType}`, e.g. `ClassNotFoundException`
      | +| `kyuubi.backend_service.open_session` | | timer | 1.5.0 |
      kyuubi backend service `openSession` method execution time and rate
      | +| `kyuubi.backend_service.close_session` | | timer | 1.5.0 |
      kyuubi backend service `closeSession` method execution time and rate
      | +| `kyuubi.backend_service.get_info` | | timer | 1.5.0 |
      kyuubi backend service `getInfo` method execution time and rate
      | +| `kyuubi.backend_service.execute_statement` | | timer | 1.5.0 |
      kyuubi backend service `executeStatement` method execution time and rate
      | +| `kyuubi.backend_service.get_type_info` | | timer | 1.5.0 |
      kyuubi backend service `getTypeInfo` method execution time and rate
      | +| `kyuubi.backend_service.get_catalogs` | | timer | 1.5.0 |
      kyuubi backend service `getCatalogs` method execution time and rate
      | +| `kyuubi.backend_service.get_schemas` | | timer | 1.5.0 |
      kyuubi backend service `getSchemas` method execution time and rate
      | +| `kyuubi.backend_service.get_tables` | | timer | 1.5.0 |
      kyuubi backend service `getTables` method execution time and rate
      | +| `kyuubi.backend_service.get_table_types` | | timer | 1.5.0 |
      kyuubi backend service `getTableTypes` method execution time and rate
      | +| `kyuubi.backend_service.get_columns` | | timer | 1.5.0 |
      kyuubi backend service `getColumns` method execution time and rate
      | +| `kyuubi.backend_service.get_functions` | | timer | 1.5.0 |
      kyuubi backend service `getFunctions` method execution time and rate
      | +| `kyuubi.backend_service.get_operation_status` | | timer | 1.5.0 |
      kyuubi backend service `getOperationStatus` method execution time and rate
      | +| `kyuubi.backend_service.cancel_operation` | | timer | 1.5.0 |
      kyuubi backend service `cancelOperation` method execution time and rate
      | +| `kyuubi.backend_service.close_operation` | | timer | 1.5.0 |
      kyuubi backend service `closeOperation` method execution time and rate
      | +| `kyuubi.backend_service.get_result_set_metadata` | | timer | 1.5.0 |
      kyuubi backend service `getResultSetMetadata` method execution time and rate
      | +| `kyuubi.backend_service.fetch_results` | | timer | 1.5.0 |
      kyuubi backend service `fetchResults` method execution time and rate
      | +| `kyuubi.backend_service.fetch_log_rows_rate` | | meter | 1.5.0 |
      kyuubi backend service `fetchResults` method that fetch log rows rate
      | +| `kyuubi.backend_service.fetch_result_rows_rate` | | meter | 1.5.0 |
      kyuubi backend service `fetchResults` method that fetch result rows rate
      | +| `kyuubi.backend_service.get_primary_keys` | | meter | 1.6.0 |
      kyuubi backend service `get_primary_keys` method execution time and rate
      | +| `kyuubi.backend_service.get_cross_reference` | | meter | 1.6.0 |
      kyuubi backend service `get_cross_reference` method execution time and rate
      | +| `kyuubi.operation.state` | `${operationType}`
      `.${state}` | meter | 1.6.0 |
      The `${operationType}` with a particular `${state}` rate, e.g. `BatchJobSubmission.pending`, `BatchJobSubmission.finished`. Note that, the terminal states are cumulative, but the intermediate ones are not.
      | +| `kyuubi.metadata.request.opened` | | counter | 1.6.1 |
      current opened count for the metadata requests
      | +| `kyuubi.metadata.request.total` | | meter | 1.6.0 |
      metadata requests time and rate
      | +| `kyuubi.metadata.request.failed` | | meter | 1.6.0 |
      metadata requests failure time and rate
      | +| `kyuubi.metadata.request.retrying` | | meter | 1.6.0 |
      retrying metadata requests time and rate, it is not cumulative
      | Before v1.5.0, if you use these metrics: - `kyuubi.statement.total` diff --git a/docs/monitor/trouble_shooting.md b/docs/monitor/trouble_shooting.md index b7abc30261d..e6ba5ea1a6b 100644 --- a/docs/monitor/trouble_shooting.md +++ b/docs/monitor/trouble_shooting.md @@ -1,25 +1,26 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Trouble Shooting ## Common Issues + ### java.lang.UnsupportedClassVersionError .. Unsupported major.minor version 52.0 + ``` Exception in thread "main" java.lang.UnsupportedClassVersionError: org/apache/kyuubi/server/KyuubiServer : Unsupported major.minor version 52.0 at java.lang.ClassLoader.defineClass1(Native Method) @@ -87,10 +88,8 @@ To fix this problem you should export `HADOOP_CONF_DIR` to the folder that conta echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> conf/kyuubi-env.sh ``` - ### javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]; - ### org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x ``` @@ -167,7 +166,6 @@ The user do not have permission to create to Hadoop home dir, which is `/user/hz To fix this problem you need to create this directory first and grant ACL permission for `hzyanqin`. - ### org.apache.thrift.TApplicationException: Invalid method name: 'get_table_req' ``` @@ -198,7 +196,6 @@ This error means that you are using incompatible version of Hive metastore clien To fix this problem you could use a compatible version of Hive client by configuring `spark.sql.hive.metastore.jars` and `spark.sql.hive.metastore.version` at Spark side. - ### hive.server2.thrift.max.worker.threads ``` @@ -209,6 +206,7 @@ Error: org.apache.thrift.transport.TTransportException (state=08S01,code=0) In Kyuubi, we should increase `kyuubi.frontend.min.worker.threads` instead of `hive.server2.thrift.max.worker.threads` ### Failed to create function using jar + `CREATE TEMPORARY FUNCTION TEST AS 'com.netease.UDFTest' using jar 'hdfs:///tmp/udf.jar'` ``` @@ -248,7 +246,9 @@ If you get this exception when creating a function, you can check your JDK versi You should update JDK to JDK1.8.0_121 and later, since JDK1.8.0_121 fix a security issue [Additional access restrictions for URLClassLoader.newInstance](https://www.oracle.com/java/technologies/javase/8u121-relnotes.html). ### Failed to start Spark 3.1 with error msg 'Cannot modify the value of a Spark config' + Here is the error message + ``` Caused by: org.apache.spark.sql.AnalysisException: Cannot modify the value of a Spark config: spark.yarn.queue at org.apache.spark.sql.RuntimeConfig.requireNonStaticConf(RuntimeConfig.scala:156) diff --git a/docs/overview/architecture.md b/docs/overview/architecture.md index d3dc7030a0a..4df5e24a4ab 100644 --- a/docs/overview/architecture.md +++ b/docs/overview/architecture.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi Architecture @@ -108,7 +107,7 @@ and these applications can be placed in different shared domains for other conne Kyuubi does not occupy any resources from the Cluster Manager(e.g. Yarn) during startup and will give all resources back if there is not any active session interacting with a `SparkContext`. -Spark also provides [Dynamic Resource Allocation](http://spark.apache.org/docs/latest/job-scheduling.html#dynamic-resource-allocation) to dynamically adjust the resources your application occupies based on the workload. It means +Spark also provides [Dynamic Resource Allocation](https://spark.apache.org/docs/latest/job-scheduling.html#dynamic-resource-allocation) to dynamically adjust the resources your application occupies based on the workload. It means that your application may give resources back to the cluster if they are no longer used and request them again later when there is demand. This feature is handy if multiple applications share resources in your Spark cluster. @@ -133,7 +132,6 @@ On the one hand, because tom enables Spark's dynamic resource request feature, Spark will efficiently request and recycle executors within the program based on the SQL operations scale and the available resources in the queue. On the other hand, when Kyuubi finds that the application has been idle for too long, it will also recycle its application. - ## High Availability & Load Balance For an enterprise service, the Service Level Agreement(SLA) commitment must be very high. @@ -174,5 +172,5 @@ We also create a [Submarine: Spark Security](https://mvnrepository.com/artifact/ ## Conclusions -Kyuubi is a unified multi-tenant JDBC interface for large-scale data processing and analytics, built on top of [Apache Spark™](http://spark.apache.org/). +Kyuubi is a unified multi-tenant JDBC interface for large-scale data processing and analytics, built on top of [Apache Spark™](https://spark.apache.org/). It extends the Spark Thrift Server's scenarios in enterprise applications, the most important of which is multi-tenancy support. diff --git a/docs/overview/kyuubi_vs_hive.md b/docs/overview/kyuubi_vs_hive.md index 40ee9136b66..80038c17864 100644 --- a/docs/overview/kyuubi_vs_hive.md +++ b/docs/overview/kyuubi_vs_hive.md @@ -1,23 +1,22 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi v.s. HiveServer2 - ## Introduction HiveServer2 is a service that enables clients to execute Hive QL queries on Hive supporting multi-client concurrency and authentication. @@ -25,29 +24,26 @@ Kyuubi enables clients to execute Spark SQL queries directly on Spark supporting They are both designed to provide better support for open API clients like JDBC and ODBC to manage and analyze BigData. - ## Hive on Spark The purpose of Hive on Spark is to add Spark as a third execution backend, parallel to MR and Tez. Comparing to Hive on MR, it's use the Spark DAG will help improve the performance of Hive queries, especially those have multiple reducer stages. - - - ## Differences Between Kyuubi and HiveServer2 -- | Kyuubi | HiveServer2 | ---- | --- | --- -** Language ** | Spark SQL | Hive QL -** Optimizer ** | Spark SQL Catalyst | Hive Optimizer -** Engine ** | up to Spark 3.x | MapReduce/[up to Spark 2.3](https://cwiki.apache.org/confluence/display/Hive/Hive+on+Spark%3A+Getting+Started#HiveonSpark:GettingStarted-VersionCompatibility)/Tez -** Performance ** | High | Low -** Compatibility with Spark ** | Good | Bad(need to rebuild on a specific version) -** Data Types ** | [Spark Data Types](http://spark.apache.org/docs/latest/sql-ref-datatypes.html) | [Hive Data Types](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types) - +| | Kyuubi | HiveServer2 | +|------------------------------|---------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **Language** | Spark SQL | Hive QL | +| **Optimizer** | Spark SQL Catalyst | Hive Optimizer | +| **Engine** | up to Spark 3.x | MapReduce/[up to Spark 2.3](https://cwiki.apache.org/confluence/display/Hive/Hive+on+Spark%3A+Getting+Started#HiveonSpark:GettingStarted-VersionCompatibility)/Tez | +| **Performance** | High | Low | +| **Compatibility with Spark** | Good | Bad(need to rebuild on a specific version) | +| **Data Types** | [Spark Data Types](https://spark.apache.org/docs/latest/sql-ref-datatypes.html) | [Hive Data Types](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types) | ## Performance + ## References 1. [HiveServer2 Overview](https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Overview) + diff --git a/docs/overview/kyuubi_vs_thriftserver.md b/docs/overview/kyuubi_vs_thriftserver.md index 9aeb5962b11..66f900c7441 100644 --- a/docs/overview/kyuubi_vs_thriftserver.md +++ b/docs/overview/kyuubi_vs_thriftserver.md @@ -1,28 +1,27 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi v.s. Spark Thrift JDBC/ODBC Server (STS) ## Introductions -The Apache Spark [Thrift JDBC/ODBC Server](http://spark.apache.org/docs/latest/sql-distributed-sql-engine.html) is a Thrift service implemented by the Apache Spark community based on HiveServer2. +The Apache Spark [Thrift JDBC/ODBC Server](https://spark.apache.org/docs/latest/sql-distributed-sql-engine.html) is a Thrift service implemented by the Apache Spark community based on HiveServer2. Designed to be seamlessly compatible with HiveServer2, it provides Spark SQL capabilities to end-users in a pure SQL way through a JDBC interface. -This "out-of-the-box" model minimizes the barriers and costs for users to use Spark. +This "out-of-the-box" model minimizes the barriers and costs for users to use Spark. Kyuubi and Spark are aligned in this goal. On top of that, Kyuubi has made enhancements in multi-tenant support, service availability, service concurrency capability, data security, and other aspects. @@ -48,7 +47,7 @@ If they use too many resources, will it affect other critical tasks? Otherwise, will the cluster's resources be idle and wasted? It is also hard for users to set up thousands of Spark configurations properly. Key features like [Dynamic Resource Allocation](../deployment/spark/dynamic_allocation.md), Speculation might be hard to benefit all with a one-time setup. -And new features like [Adaptive Query Execution](../deployment/spark/aqe.md) could come a long way from the first release involved of Spark to finally get applied to end-users. +And new features like [Adaptive Query Execution](../deployment/spark/aqe.md) could come a long way from the first release involved of Spark to finally get applied to end-users. #### Insecurity @@ -98,7 +97,6 @@ The server-side upgrade will not cause interface incompatibility. As for the potential SQL compatibility problem in Spark version upgrade, it also exists when not using Spark ThriftServer, and is more challenging to solve. Moreover, in Spark ThriftServer mode, the server-side can do the full amount of SQL collection in advance, and the verification can be done before the upgrade. - ## Limitations of Spark ThriftServer As we can see from the basic architecture of Spark ThriftServer above, it is essentially a single Spark application, and there are generally significant limitations to responding to thousands of client requests. @@ -119,7 +117,7 @@ With Fair Scheduler Pools, Spark ThriftServer has the ability of resource isolat It will send queries to a high-weight pool to get more executors for execution. In essence, resource isolation such as CPU/memory/IO should be something that resource managers like YARN and Kubernetes should do. Doing logical isolation at the computing layer is unlikely to work well, and this problem exists in the Apache Impala project as well, for example. -And it is difficult to avoid the problem of HMS, HDFS single point access, especially in the scenario of reading and writing dynamic partition tables or handling queries with numerous `Union`s. +And it is difficult to avoid the problem of HMS, HDFS single point access, especially in the scenario of reading and writing dynamic partition tables or handling queries with numerous `Union`s. ### Multi-tenancy limitations @@ -162,7 +160,6 @@ Besides, since UDFs are loaded directly into the Spark ThriftServer, if they con The HiveServer2 is also introduced here for a more comprehensive comparison. - || HiveServer2
      (Hive on Spark) | Spark ThriftServer | Kyuubi | |--|--|--|--| |**Interface** | HiveJDBC | HiveJDBC | HiveJDBC | @@ -183,16 +180,15 @@ The HiveServer2 is also introduced here for a more comprehensive comparison. |**Compute
      Resource
      Management** | YARN |pools| YARN, Kubernetes, etc. | |**Resource
      Occupancy
      Time** | within a query | Permanent | Using Kyuubi Engine to request and
      release resources
      1. For `CONNECTION` level isolation, an Engine terminates when a JDBC connection disconnects
      2. For other modes, an Engine timeouts after all connections disconnect.
      3. All isolation modes support [DRA](../deployment/spark/dynamic_allocation.md)
      | -### Consistent Interfaces +### Consistent Interfaces Kyuubi, Spark Thrift Server, and HiveServer2 are identical in terms of interfaces and protocols. Therefore, from the user's point of view, the way of use is unchanged. -Compared with HiveServer2, the most significant advantage of the first two should be the performance improvement. +Compared with HiveServer2, the most significant advantage of the first two should be the performance improvement. From the perspective of SQL syntax compatibility, Kyuubi and Spark Thrift Server are fully compatible with Spark SQL as they are completely delegated to the Spark SQL Catalyst layer. Spark SQL also fully supports Hive QL collections, with only a few enumerable SQL behaviors and syntax differences. - ### Multi-tenant Architecture `From wikipedia`: The term "software multitenancy" refers to a software architecture in which a single instance of the software runs on a server and serves multiple tenants. Systems designed in such a manner are often called shared (in contrast to dedicated or isolated). diff --git a/docs/quick_start/quick_start.rst b/docs/quick_start/quick_start.rst index ca73fba35f3..db564edb92c 100644 --- a/docs/quick_start/quick_start.rst +++ b/docs/quick_start/quick_start.rst @@ -143,7 +143,7 @@ To install Spark, you need to unpack the tarball. For example, .. code-block:: - $ tar zxf spark-3.3.1-bin-hadoop3.tgz + $ tar zxf spark-3.3.2-bin-hadoop3.tgz Configuration ~~~~~~~~~~~~~ diff --git a/docs/quick_start/quick_start_with_helm.md b/docs/quick_start/quick_start_with_helm.md index 4b3c85ba7d7..a2de5444560 100644 --- a/docs/quick_start/quick_start_with_helm.md +++ b/docs/quick_start/quick_start_with_helm.md @@ -1,106 +1,120 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> +# Getting Started With Kyuubi on Kubernetes -# Getting Started With Kyuubi on kubernetes +## Running Kyuubi with Helm -## Running kyuubi with helm +[Helm](https://helm.sh/) is the package manager for Kubernetes, it can be used to find, share, and use software built for Kubernetes. -[Helm](https://helm.sh/) is the package manager for Kubernetes,it can be used to find, share, and use software built for Kubernetes. +### Install Helm -### Get helm and Install - -Please go to [Install Helm](https://helm.sh/docs/intro/install/) page to get and install an appropriate release version for yourself. +Please go to [Installing Helm](https://helm.sh/docs/intro/install/) page to get and install an appropriate release version for yourself. ### Get Kyuubi Started -#### [Optional] Create namespace on kubernetes -```bash -create ns kyuubi -``` +#### Install the chart -#### Get kyuubi started -```bash -helm install kyuubi-helm ${KYUUBI_HOME}/charts/kyuubi -n ${namespace_name} +```shell +helm install kyuubi ${KYUUBI_HOME}/charts/kyuubi -n kyuubi --create-namespace ``` -It will print variables and the way to get kyuubi expose ip and port. -```bash -NAME: kyuubi-helm -LAST DEPLOYED: Wed Oct 20 15:22:47 2021 + +It will print release info with notes, including the ways to get Kyuubi accessed within Kubernetes cluster and exposed externally depending on the configuration provided. + +```shell +NAME: kyuubi +LAST DEPLOYED: Sat Feb 11 20:59:00 2023 NAMESPACE: kyuubi STATUS: deployed REVISION: 1 TEST SUITE: None NOTES: -Get kyuubi expose URL by running these commands: - export NODE_PORT=$(kubectl get --namespace kyuubi -o jsonpath="{.spec.ports[0].nodePort}" services kyuubi-svc) - export NODE_IP=$(kubectl get nodes --namespace kyuubi -o jsonpath="{.items[0].status.addresses[0].address}") - echo $NODE_IP:$NODE_PORT +The chart has been installed! + +In order to check the release status, use: + helm status kyuubi -n kyuubi + or for more detailed info + helm get all kyuubi -n kyuubi + +************************ +******* Services ******* +************************ +THRIFT_BINARY: +- To access kyuubi-thrift-binary service within the cluster, use the following URL: + kyuubi-thrift-binary.kyuubi.svc.cluster.local +- To access kyuubi-thrift-binary service from outside the cluster for debugging, run the following command: + kubectl port-forward svc/kyuubi-thrift-binary 10009:10009 -n kyuubi + and use 127.0.0.1:10009 ``` -#### Using hive beeline -[Using Hive Beeline](./quick_start.html#using-hive-beeline) to opening a connection. +#### Uninstall the chart -#### Remove kyuubi -```bash -helm uninstall kyuubi-helm -n ${namespace_name} +```shell +helm uninstall kyuubi -n kyuubi ``` -#### Edit server config +#### Configure chart release + +Specify configuration properties using `--set` flag. +For example, to install the chart with `replicaCount` set to `1`, use the following command: + +```shell +helm install kyuubi ${KYUUBI_HOME}/charts/kyuubi -n kyuubi --create-namespace --set replicaCount=1 +``` + +Also, custom values file can be used to override default property values. For example, create `myvalues.yaml` to specify `replicaCount` and `resources`: -Modify `values.yaml` under `${KYUUBI_HOME}/docker/helm`: ```yaml -# Kyuubi server numbers -replicaCount: 2 - -image: - repository: apache/kyuubi - pullPolicy: Always - # Overrides the image tag whose default is the chart appVersion. - tag: "master-snapshot" - -server: - bind: - host: 0.0.0.0 - port: 10009 - conf: - mountPath: /opt/kyuubi/conf - -service: - type: NodePort - # The default port limit of kubernetes is 30000-32767 - # to change: - # vim kube-apiserver.yaml (usually under path: /etc/kubernetes/manifests/) - # add or change line 'service-node-port-range=1-32767' under kube-apiserver - port: 30009 +replicaCount: 1 + +resources: + requests: + cpu: 2 + memory: 4Gi + limits: + cpu: 4 + memory: 10Gi +``` + +and use it to override default chart values with `-f` flag: + +```shell +helm install kyuubi ${KYUUBI_HOME}/charts/kyuubi -n kyuubi --create-namespace -f myvalues.yaml ``` -#### Get server log -List all server pods: -```bash -kubectl get po -n ${namespace_name} +#### Access logs + +List all pods in the release namespace: + +```shell +kubectl get pod -n kyuubi ``` -The server pods will print: -```text -NAME READY STATUS RESTARTS AGE -kyuubi-server-585d8944c5-m7j5s 1/1 Running 0 30m -kyuubi-server-32sdsa1245-2d2sj 1/1 Running 0 30m + +Find Kyuubi pods: + +```shell +NAME READY STATUS RESTARTS AGE +kyuubi-5b6d496c98-kbhws 1/1 Running 0 38m +kyuubi-5b6d496c98-lqldk 1/1 Running 0 38m ``` -then, use pod name to get logs: -```bash -kubectl -n ${namespace_name} logs kyuubi-server-585d8944c5-m7j5s + +Then, use pod name to get logs: + +```shell +kubectl logs kyuubi-5b6d496c98-kbhws -n kyuubi ``` + diff --git a/docs/quick_start/quick_start_with_jdbc.md b/docs/quick_start/quick_start_with_jdbc.md index e305530f1b9..c22cc1b65c1 100644 --- a/docs/quick_start/quick_start_with_jdbc.md +++ b/docs/quick_start/quick_start_with_jdbc.md @@ -1,24 +1,24 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Getting Started With Hive JDBC ## How to install JDBC driver + Kyuubi JDBC driver is fully compatible with the 2.3.* version of hive JDBC driver, so we reuse hive JDBC driver to connect to Kyuubi server. Add repository to your maven configuration file which may reside in `$MAVEN_HOME/conf/settings.xml`. @@ -32,6 +32,7 @@ Add repository to your maven configuration file which may reside in `$MAVEN_HOME ``` + You can add below dependency to your `pom.xml` file in your application. ```xml @@ -50,6 +51,7 @@ You can add below dependency to your `pom.xml` file in your application. ``` ## Use JDBC driver with kerberos + The below java code is using a keytab file to login and connect to Kyuubi server by JDBC. ```java @@ -91,3 +93,4 @@ public class JDBCTest { } } ``` + diff --git a/docs/quick_start/quick_start_with_jupyter.md b/docs/quick_start/quick_start_with_jupyter.md index 9a651d45b0a..44b3faa5786 100644 --- a/docs/quick_start/quick_start_with_jupyter.md +++ b/docs/quick_start/quick_start_with_jupyter.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Getting Started With Hive Jupyter Lap diff --git a/docs/requirements.txt b/docs/requirements.txt index 8a5ee7e128b..ecc8116e77d 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -17,11 +17,10 @@ # under the License. # -# we shall bypass markdown-3.4.1, see details in KYUUBI-3126 -markdown==3.3.7 +markdown==3.4.1 recommonmark==0.7.1 sphinx==4.5.0 sphinx-book-theme==0.3.3 -sphinx-markdown-tables==0.0.15 -sphinx-notfound-page==0.8 +sphinx-markdown-tables==0.0.17 +sphinx-notfound-page==0.8.3 sphinx-togglebutton===0.3.2 diff --git a/docs/security/authorization/spark/build.md b/docs/security/authorization/spark/build.md index bef011867ad..3886f08dfa3 100644 --- a/docs/security/authorization/spark/build.md +++ b/docs/security/authorization/spark/build.md @@ -1,28 +1,25 @@ - - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Building Kyuubi Spark AuthZ Plugin - ## Build with Apache Maven -Kyuubi Spark AuthZ Plugin is built using [Apache Maven](http://maven.apache.org). +Kyuubi Spark AuthZ Plugin is built using [Apache Maven](https://maven.apache.org). To build it, `cd` to the root direct of kyuubi project and run: ```shell @@ -48,14 +45,14 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -DskipTests -Dspark.version The available `spark.version`s are shown in the following table. -| Spark Version | Supported | Remark | -|:-----------------:|:-----------:|:----------------------------------------------------------------------------------------------------------------------:| -| master | √ | - | -| 3.3.x | √ | - | -| 3.2.x | √ | - | -| 3.1.x | √ | - | -| 3.0.x | √ | - | -| 2.4.x and earlier | × | [PR 2367](https://github.com/apache/kyuubi/pull/2367) is used to track how we work with older releases with scala 2.11 | +| Spark Version | Supported | Remark | +|:-----------------:|:---------:|:----------------------------------------------------------------------------------------------------------------------:| +| master | √ | - | +| 3.3.x | √ | - | +| 3.2.x | √ | - | +| 3.1.x | √ | - | +| 3.0.x | √ | - | +| 2.4.x and earlier | × | [PR 2367](https://github.com/apache/kyuubi/pull/2367) is used to track how we work with older releases with scala 2.11 | Currently, Spark released with Scala 2.12 are supported. @@ -71,21 +68,22 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -DskipTests -Dranger.versio The available `ranger.version`s are shown in the following table. -| Ranger Version | Supported | Remark | -|:--------------:|:-----------:|:------:| -| 2.3.x | √ | - | -| 2.2.x | √ | - | -| 2.1.x | √ | - | -| 2.0.x | √ | - | -| 1.2.x | √ | - | -| 1.1.x | √ | - | -| 1.0.x | √ | - | -| 0.7.x | √ | - | -| 0.6.x | √ | - | +| Ranger Version | Supported | Remark | +|:--------------:|:---------:|:------:| +| 2.3.x | √ | - | +| 2.2.x | √ | - | +| 2.1.x | √ | - | +| 2.0.x | √ | - | +| 1.2.x | √ | - | +| 1.1.x | √ | - | +| 1.0.x | √ | - | +| 0.7.x | √ | - | +| 0.6.x | √ | - | Currently, all ranger releases are supported. ## Test with ScalaTest Maven plugin + If you omit `-DskipTests` option in the command above, you will also get all unit tests run. ```shell diff --git a/docs/security/authorization/spark/install.md b/docs/security/authorization/spark/install.md index 1d77d15b5eb..f820f53c4ec 100644 --- a/docs/security/authorization/spark/install.md +++ b/docs/security/authorization/spark/install.md @@ -1,21 +1,19 @@ - - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Installing and Configuring Kyuubi Spark AuthZ Plugin @@ -42,8 +40,9 @@ With the `kyuubi-spark-authz_*.jar` and its transitive dependencies available fo ### Settings for Connecting Ranger Admin #### ranger-spark-security.xml + - Create `ranger-spark-security.xml` in `$SPARK_HOME/conf` and add the following configurations -for pointing to the right Ranger admin server. + for pointing to the right Ranger admin server. ```xml @@ -74,10 +73,11 @@ for pointing to the right Ranger admin server. ``` + ##### Using Macros in Row Level Filters Macros are now supported for using user/group/tag in row filter expressions, introduced in [Ranger 2.3](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+2.3.0+-+Release+Notes). This feature helps significantly simplify row filter expressions by using user/group/tag's attributes instead of explicit conditions. Considering a user with an attribute `born_city` of value `Guangzhou `, the row filter condition as `city='${{USER.born_city}}'` will be transformed to `city='Guangzhou'` in execution plan. More supported macros and usage refer to [RANGER-3605](https://issues.apache.org/jira/browse/RANGER-3605) and [RANGER-3550](https://issues.apache.org/jira/browse/RANGER-3550). Add the following configs to `ranger-spark-security.xml` to enable UserStore Enricher required by macros. - + ```xml ranger.plugin.spark.enable.implicit.userstore.enricher @@ -93,13 +93,15 @@ Macros are now supported for using user/group/tag in row filter expressions, int ``` ##### Showing all disallowed privileges + By default, Authz plugin checks required privileges one by one and throw the first unsatisfied privilege in exception. By setting `ranger.plugin.spark.authorize.in.single.call` to `true`, Authz plugin executes access checks in single call and throws all disallowed privileges in exception message. + ```xml - - ranger.plugin.spark.authorize.in.single.call - true - Enable access checks in single call with all disallowed privileges thrown in exception. Default value is false. - + + ranger.plugin.spark.authorize.in.single.call + true + Enable access checks in single call with all disallowed privileges thrown in exception. Default value is false. + ``` #### ranger-spark-audit.xml @@ -150,3 +152,4 @@ Add `org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension` to the sp ```properties spark.sql.extensions=org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension ``` + diff --git a/docs/security/hadoop_credentials_manager.md b/docs/security/hadoop_credentials_manager.md index 087d2c68b0c..baed91c8e3a 100644 --- a/docs/security/hadoop_credentials_manager.md +++ b/docs/security/hadoop_credentials_manager.md @@ -1,85 +1,90 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Hadoop Credentials Manager -In order to pass the authentication of a kerberos secured hadoop cluster, kyuubi currently submits +In order to pass the authentication of a kerberos secured hadoop cluster, kyuubi currently submits engines in two ways: 1. Submits with current kerberos user and extra `SparkSubmit` argument `--proxy-user`. 2. Submits with `spark.kerberos.principal` and `spark.kerberos.keytab` specified. -If engine is submitted with `--proxy-user` specified, its delegation tokens of hadoop cluster +If engine is submitted with `--proxy-user` specified, its delegation tokens of hadoop cluster services are obtained by current kerberos user and can not be renewed by itself. Thus, engine's lifetime is limited by the lifetime of delegation tokens. To remove this limitation, kyuubi renews delegation tokens at server side in Hadoop Credentials Manager. -Engine submitted with principal and keytab can renew delegation tokens by itself. +Engine submitted with principal and keytab can renew delegation tokens by itself. But for implementation simplicity, kyuubi server will also renew delegation tokens for it. ## Configurations ### Cluster Services + Kyuubi currently supports renew delegation tokens of Hadoop filesystems and Hive metastore servers. #### Hadoop client configurations + Set `HADOOP_CONF_DIR` in `$KYUUBI_HOME/conf/kyuubi-env.sh` if it hasn't been set yet, e.g. ```bash $ echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> $KYUUBI_HOME/conf/kyuubi-env.sh ``` + Extra Hadoop filesystems can be specified in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` by `kyuubi.credentials.hadoopfs.uris` in comma separated list. #### Hive metastore configurations ##### Via kyuubi-defaults.conf -Specify Hive metastore configurations In `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. Hadoop Credentials + +Specify Hive metastore configurations In `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. Hadoop Credentials Manager will load the configurations when initialized. ##### Via hive-site.xml -Place your copy of `hive-site.xml` into `$KYUUBI_HOME/conf`, Kyuubi will load this config file to + +Place your copy of `hive-site.xml` into `$KYUUBI_HOME/conf`, Kyuubi will load this config file to its classpath. This version of configuration has lower priority than those in `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. ##### Via JDBC Connection URL + Hive configurations specified in JDBC connection URL are ignored by Hadoop Credentials Manager as Hadoop Credentials Manager is initialized when Kyuubi server starts. ### Credentials Renewal -Key | Default | Meaning | Type | Since ---- | --- | --- | --- | --- -kyuubi.credentials.hadoopfs.enabled|
      true
      |
      Whether to renew Hadoop filesystem delegation tokens
      |
      boolean
      |
      1.4.0
      -kyuubi.credentials.hadoopfs.uris|
      |
      Extra Hadoop filesystem URIs for which to request delegation tokens. The filesystem that hosts fs.defaultFS does not need to be listed here.
      |
      seq
      |
      1.4.0
      -kyuubi.credentials.hive.enabled|
      true
      |
      Whether to renew Hive metastore delegation token
      |
      boolean
      |
      1.4.0
      -kyuubi.credentials.renewal.interval|
      PT1H
      |
      How often Kyuubi renews one user's delegation tokens
      |
      duration
      |
      1.4.0
      -kyuubi.credentials.renewal.retry.wait|
      PT1M
      |
      How long to wait before retrying to fetch new credentials after a failure.
      |
      duration
      |
      1.4.0
      - +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------|-------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------|--------------------------------------| +| kyuubi.credentials.hadoopfs.enabled |
      true
      |
      Whether to renew Hadoop filesystem delegation tokens
      |
      boolean
      |
      1.4.0
      | +| kyuubi.credentials.hadoopfs.uris |
      |
      Extra Hadoop filesystem URIs for which to request delegation tokens. The filesystem that hosts fs.defaultFS does not need to be listed here.
      |
      seq
      |
      1.4.0
      | +| kyuubi.credentials.hive.enabled |
      true
      |
      Whether to renew Hive metastore delegation token
      |
      boolean
      |
      1.4.0
      | +| kyuubi.credentials.renewal.interval |
      PT1H
      |
      How often Kyuubi renews one user's delegation tokens
      |
      duration
      |
      1.4.0
      | +| kyuubi.credentials.renewal.retry.wait |
      PT1M
      |
      How long to wait before retrying to fetch new credentials after a failure.
      |
      duration
      |
      1.4.0
      | ### Required Security Configs The necessary configurations for hdfs and hive to obtain delegation token are as follows: -Key | Meaning | value ---- | --- | --- -hadoop.security.authentication|
      Set the authentication for the cluster
      |
      kerberos
      -hive.metastore.uris|
      URI for client to contact metastore server
      |
      thrift://{metastoreHost}:{metastorePort}}
      -hive.metastore.sasl.enabled|
      If true, the metastore thrift interface will be secured with SASL.Clients must authenticate with Kerberos.
      |
      true
      -hive.metastore.kerberos.principal|
      The service principal for the metastore thrift server. The special string _HOST will be replaced automatically with the correct host name.
      |
      for example hive/_HOST@${realm}
      -hive.metastore.kerberos.keytab.file|
      The path to the Kerberos Keytab file containing the metastore thrift server's service principal.
      |
      for example /etc/security/keytabs/hive.service.keytab
      +| Key | Meaning | value | +|--------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------| +| hadoop.security.authentication |
      Set the authentication for the cluster
      |
      kerberos
      | +| hive.metastore.uris |
      URI for client to contact metastore server
      |
      thrift://{metastoreHost}:{metastorePort}}
      | +| hive.metastore.sasl.enabled |
      If true, the metastore thrift interface will be secured with SASL.Clients must authenticate with Kerberos.
      |
      true
      | +| hive.metastore.kerberos.principal |
      The service principal for the metastore thrift server. The special string _HOST will be replaced automatically with the correct host name.
      |
      for example hive/_HOST@${realm}
      | +| hive.metastore.kerberos.keytab.file |
      The path to the Kerberos Keytab file containing the metastore thrift server's service principal.
      |
      for example /etc/security/keytabs/hive.service.keytab
      | + diff --git a/docs/security/jdbc.md b/docs/security/jdbc.md index 0da6634f7d8..48c1d082f9d 100644 --- a/docs/security/jdbc.md +++ b/docs/security/jdbc.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Configure Kyuubi to Use JDBC Authentication @@ -27,16 +26,17 @@ The SQL statement must start with the `SELECT` clause. Placeholders are supporte For example, `SELECT 1 FROM auth_db.auth_table WHERE user=${user} AND passwd=MD5(CONCAT(salt,${password}))` will be prepared as `SELECT 1 FROM auth_db.auth_table WHERE user=? AND passwd=MD5(CONCAT(salt,?))` with value replacement of `user` and `password` in string type. -## Enable JDBC Authentication +## Enable JDBC Authentication -To enable the jdbc authentication method, we need to +To enable the JDBC authentication method, we need to -- Put the jdbc driver jar file to `$KYUUBI_HOME/jars` directory to make it visible for +- Put the JDBC driver jar file to `$KYUUBI_HOME/jars` directory to make it visible for the classpath of the kyuubi server. - Configure the following properties to `$KYUUBI_HOME/conf/kyuubi-defaults.conf` on each node where kyuubi server is installed. ## Configure the authentication properties + Configure the following properties to `$KYUUBI_HOME/conf/kyuubi-defaults.conf` on each node where kyuubi server is installed. ```properties @@ -50,7 +50,7 @@ kyuubi.authentication.jdbc.query = SELECT 1 FROM auth_table WHERE user=${user} A ## Authentication with In-memory Database -Used with auto created in-memory database, JDBC authentication could be applied for token validation without starting up a dedicated database service or setting up a custom plugin. +Used with auto created in-memory database, JDBC authentication could be applied for token validation without starting up a dedicated database service or setting up a custom plugin. Consider authentication for a pair of a username and a token which contacted with an `expire_time` in 'yyyyMMddHHmm' format and a MD5 signature generated with sequence of `expire_time`, `username` and a secret key. With the following example, an H2 in-memory database will be auto crated with Kyuubi Server and used for authentication with its system function `HASH` and checking token expire time with `NOW()`. @@ -66,3 +66,4 @@ kyuubi.authentication.jdbc.query = SELECT 1 FROM ( \ ) WHERE signed = RAWTOHEX(HASH('MD5', CONCAT(secret_key, username, expire_time))) \ AND PARSEDATETIME(expire_time,'yyyyMMddHHmm') > NOW() ``` + diff --git a/docs/security/kerberos.rst b/docs/security/kerberos.rst index c4bca8e8219..2505fa30d8b 100644 --- a/docs/security/kerberos.rst +++ b/docs/security/kerberos.rst @@ -115,4 +115,5 @@ Refresh all the kyuubi server instances Restart all the kyuubi server instances or `Refresh Configurations`_ to activate the settings. .. _Hadoop Impersonation: https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/Superusers.html -.. _Refresh Configurations: ..tools/kyuubi-admin.html#refresh-config +.. _configurations: ../client/advanced/kerberos.html +.. _Refresh Configurations: ../tools/kyuubi-admin.html#refresh-config diff --git a/docs/security/kinit.md b/docs/security/kinit.md index d7089625872..0d613e0006e 100644 --- a/docs/security/kinit.md +++ b/docs/security/kinit.md @@ -1,23 +1,23 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kinit Auxiliary Service -Kinit auxiliary service is a critical service both for authentication between Kyuubi client/server +Kinit auxiliary service is a critical service both for authentication between Kyuubi client/server and for authentication between Kyuubi server/Hadoop cluster in a Kerberos environment. It will get a Kerberos Ticket Cache from KDC and periodically re-kinit to keep the Ticket Cache fresh. @@ -69,17 +69,16 @@ They are valid for relatively short period. So, we always need to refresh it for ## Configurations -Key | Default | Meaning | Since ---- | --- | --- | --- -kyuubi.kinit.principal|
      <undefined>
      |
      Name of the Kerberos principal.
      |
      1.0.0
      -kyuubi.kinit.keytab|
      <undefined>
      |
      Location of Kyuubi server's keytab.
      |
      1.0.0
      -kyuubi.kinit.interval|
      PT1H
      |
      How often will Kyuubi server run `kinit -kt [keytab] [principal]` to renew the local Kerberos credentials cache
      |
      1.0.0
      -kyuubi.kinit.max.attempts|
      10
      |
      How many times will `kinit` process retry
      |
      1.0.0
      - -When working with a Kerberos-enabled Hadoop cluster, we should ensure that `hadoop.security.authentication` -is set to `KERBEROS` in `$HADOOP_CONF_DIR/core-site.xml` or `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. -Then we need to specify `kyuubi.kinit.principal` and `kyuubi.kinit.keytab` for authentication. +| Key | Default | Meaning | Since | +|----------------------------------------|--------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------| +| kyuubi.kinit.principal |
      <undefined>
      |
      Name of the Kerberos principal.
      |
      1.0.0
      | +| kyuubi.kinit.keytab |
      <undefined>
      |
      Location of Kyuubi server's keytab.
      |
      1.0.0
      | +| kyuubi.kinit.interval |
      PT1H
      |
      How often will Kyuubi server run `kinit -kt [keytab] [principal]` to renew the local Kerberos credentials cache
      |
      1.0.0
      | +| kyuubi.kinit.max.attempts |
      10
      |
      How many times will `kinit` process retry
      |
      1.0.0
      | +When working with a Kerberos-enabled Hadoop cluster, we should ensure that `hadoop.security.authentication` +is set to `KERBEROS` in `$HADOOP_CONF_DIR/core-site.xml` or `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. +Then we need to specify `kyuubi.kinit.principal` and `kyuubi.kinit.keytab` for authentication. For example, @@ -89,7 +88,7 @@ kyuubi.kinit.keytab=/path/to/kyuuib.keytab ``` **Note**: -`kyuubi.kinit.principal` must be in the format: `/@`, and `` must +`kyuubi.kinit.principal` must be in the format: `/@`, and `` must be a FQDN of the host Kyuubi is running. Kyuubi will use this `principal` to impersonate client users, @@ -101,7 +100,9 @@ For example, hadoop.proxyuser..groups * hadoop.proxyuser..hosts * ``` + ## Further Readings - [Hadoop in Secure Mode](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/SecureMode.html) -- [Use Kerberos for authentication in Spark](http://spark.apache.org/docs/latest/security.html#kerberos) +- [Use Kerberos for authentication in Spark](https://spark.apache.org/docs/latest/security.html#kerberos) + diff --git a/docs/tools/kyuubi-admin.rst b/docs/tools/kyuubi-admin.rst index cf60f67b182..6063965938c 100644 --- a/docs/tools/kyuubi-admin.rst +++ b/docs/tools/kyuubi-admin.rst @@ -69,6 +69,10 @@ Usage: ``bin/kyuubi-admin refresh config [options] []`` - Description * - hadoopConf - The hadoop conf used for proxy user verification. + * - userDefaultsConf + - The user defaults configs with key in format in the form of `___{username}___.{config key}` from default property file. + * - unlimitedUsers + - The users without maximum connections limitation. .. _list_engine: diff --git a/docs/tools/kyuubi-ctl.md b/docs/tools/kyuubi-ctl.md deleted file mode 100644 index 34fb3a23200..00000000000 --- a/docs/tools/kyuubi-ctl.md +++ /dev/null @@ -1,162 +0,0 @@ - - - -# Managing kyuubi servers and engines Tool - -## Usage -```shell -bin/kyuubi-ctl --help -``` -Output -```shell -kyuubi 1.6.0-SNAPSHOT -Usage: kyuubi-ctl [create|get|delete|list] [options] - - -zk, --zk-quorum - The connection string for the zookeeper ensemble, using zk quorum manually. - -n, --namespace The namespace, using kyuubi-defaults/conf if absent. - -s, --host Hostname or IP address of a service. - -p, --port Listening port of a service. - -v, --version Using the compiled KYUUBI_VERSION default, change it if the active service is running in another. - -b, --verbose Print additional debug output. - -Command: create [server] - -Command: create server - Expose Kyuubi server instance to another domain. - -Command: get [server|engine] [options] - Get the service/engine node info, host and port needed. -Command: get server - Get Kyuubi server info of domain -Command: get engine - Get Kyuubi engine info belong to a user. - -u, --user The user name this engine belong to. - -et, --engine-type - The engine type this engine belong to. - -es, --engine-subdomain - The engine subdomain this engine belong to. - -esl, --engine-share-level - The engine share level this engine belong to. - -Command: delete [server|engine] [options] - Delete the specified service/engine node, host and port needed. -Command: delete server - Delete the specified service node for a domain -Command: delete engine - Delete the specified engine node for user. - -u, --user The user name this engine belong to. - -et, --engine-type - The engine type this engine belong to. - -es, --engine-subdomain - The engine subdomain this engine belong to. - -esl, --engine-share-level - The engine share level this engine belong to. - -Command: list [server|engine] [options] - List all the service/engine nodes for a particular domain. -Command: list server - List all the service nodes for a particular domain -Command: list engine - List all the engine nodes for a user - -u, --user The user name this engine belong to. - -et, --engine-type - The engine type this engine belong to. - -es, --engine-subdomain - The engine subdomain this engine belong to. - -esl, --engine-share-level - The engine share level this engine belong to. - - -h, --help Show help message and exit. -``` - -## Manage kyuubi servers -You can specify the zookeeper address(`--zk-quorum`) and namespace(`--namespace`), version(`--version`) parameters to query a specific kyuubi server cluster. - -### List server -List all the service nodes for a particular domain. -```shell -bin/kyuubi-ctl list server -``` - -### Create server -Expose Kyuubi server instance to another domain. - -First read `kyuubi.ha.zookeeper.namespace` in `conf/kyuubi-defaults.conf`, if there are server instances under this namespace, register them in the new namespace specified by the `--namespace` parameter. -```shell -bin/kyuubi-ctl create server --namespace XXX -``` - -### Get server -Get Kyuubi server info of domain. -```shell -bin/kyuubi-ctl get server --host XXX --port YYY -``` - -### Delete server -Delete the specified service node for a domain. - -After the server node is deleted, the kyuubi server stops opening new sessions and waits for all currently open sessions to be closed before the process exits. -```shell -bin/kyuubi-ctl delete server --host XXX --port YYY -``` - -## Manage kyuubi engines -You can also specify the engine type(`--engine-type`), engine share level subdomain(`--engine-subdomain`) and engine share level(`--engine-share-level`). - -If not specified, the configuration item `kyuubi.engine.type` of `kyuubi-defaults.conf` read, the default value is `SPARK_SQL`, `kyuubi.engine.share.level.subdomain`, the default value is `default`, `kyuubi.engine.share.level`, the default value is `USER`. - -If the engine pool mode is enabled through `kyuubi.engine.pool.size`, the subdomain consists of `kyuubi.engine.pool.name` and a number below size, e.g. `engine-pool-0` . - -`--engine-share-level` supports the following enum values. -* CONNECTION - - The engine Ref Id (UUID) must be specified via `--engine-subdomain`. -* USER: - - Default Value. -* GROUP: - - The `--user` parameter is the group name corresponding to the user. -* SERVER: - - The `--user` parameter is the user who started the kyuubi server. - -### List engine -List all the engine nodes for a user. -```shell -bin/kyuubi-ctl list engine --user AAA -``` -The management share level is SERVER, the user who starts the kyuubi server is A, the engine is TRINO, and the subdomain is adhoc. -```shell -bin/kyuubi-ctl list engine --user A --engine-type TRINO --engine-subdomain adhoc --engine-share-level SERVER -``` - -### Get engine -Get Kyuubi engine info belong to a user. -```shell -bin/kyuubi-ctl get engine --user AAA --host XXX --port YYY -``` - -### Delete engine -Delete the specified engine node for user. - -After the engine node is deleted, the kyuubi engine stops opening new sessions and waits for all currently open sessions to be closed before the process exits. -```shell -bin/kyuubi-ctl delete engine --user AAA --host XXX --port YYY -``` diff --git a/docs/tools/kyuubi-ctl.rst b/docs/tools/kyuubi-ctl.rst new file mode 100644 index 00000000000..4a9308fed0e --- /dev/null +++ b/docs/tools/kyuubi-ctl.rst @@ -0,0 +1,213 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Administrator CLI +================= + +.. _usage: + +Usage +----- +.. code-block:: bash + + bin/kyuubi-ctl --help + +Output + +.. parsed-literal:: + + kyuubi |release| + Usage: kyuubi-ctl [create|get|delete|list] [options] + + -zk, --zk-quorum + The connection string for the zookeeper ensemble, using zk quorum manually. + -n, --namespace The namespace, using kyuubi-defaults/conf if absent. + -s, --host Hostname or IP address of a service. + -p, --port Listening port of a service. + -v, --version Using the compiled KYUUBI_VERSION default, change it if the active service is running in another. + -b, --verbose Print additional debug output. + + Command: create [server] + + Command: create server + Expose Kyuubi server instance to another domain. + + Command: get [server|engine] [options] + Get the service/engine node info, host and port needed. + Command: get server + Get Kyuubi server info of domain + Command: get engine + Get Kyuubi engine info belong to a user. + -u, --user The user name this engine belong to. + -et, --engine-type + The engine type this engine belong to. + -es, --engine-subdomain + The engine subdomain this engine belong to. + -esl, --engine-share-level + The engine share level this engine belong to. + + Command: delete [server|engine] [options] + Delete the specified service/engine node, host and port needed. + Command: delete server + Delete the specified service node for a domain + Command: delete engine + Delete the specified engine node for user. + -u, --user The user name this engine belong to. + -et, --engine-type + The engine type this engine belong to. + -es, --engine-subdomain + The engine subdomain this engine belong to. + -esl, --engine-share-level + The engine share level this engine belong to. + + Command: list [server|engine] [options] + List all the service/engine nodes for a particular domain. + Command: list server + List all the service nodes for a particular domain + Command: list engine + List all the engine nodes for a user + -u, --user The user name this engine belong to. + -et, --engine-type + The engine type this engine belong to. + -es, --engine-subdomain + The engine subdomain this engine belong to. + -esl, --engine-share-level + The engine share level this engine belong to. + + -h, --help Show help message and exit. + +.. _manage_kyuubi_servers: + +Manage kyuubi servers +--------------------- + +You can specify the zookeeper address(``--zk-quorum``) and namespace(``--namespace``), version(``--version``) parameters to query a specific kyuubi server cluster. + +.. _list_servers: + +List server +*********** + +List all the service nodes for a particular domain. + +.. code-block:: bash + + bin/kyuubi-ctl list server + +.. _create_servers: + +Create server +*********** +Expose Kyuubi server instance to another domain. + +First read ``kyuubi.ha.zookeeper.namespace`` in ``conf/kyuubi-defaults.conf``, if there are server instances under this namespace, register them in the new namespace specified by the ``--namespace`` parameter. + +.. code-block:: bash + + bin/kyuubi-ctl create server --namespace XXX + +.. _get_servers: + +Get server +*********** + +Get Kyuubi server info of domain. + +.. code-block:: bash + + bin/kyuubi-ctl get server --host XXX --port YYY + +.. _delete_servers: + +Delete server +*********** + +Delete the specified service node for a domain. + +After the server node is deleted, the kyuubi server stops opening new sessions and waits for all currently open sessions to be closed before the process exits. + +.. code-block:: bash + + bin/kyuubi-ctl delete server --host XXX --port YYY + +.. _manage_kyuubi_engines: + +Manage kyuubi engines +--------------------- + +You can also specify the engine type(``--engine-type``), engine share level subdomain(``--engine-subdomain``) and engine share level(``--engine-share-level``). + +If not specified, the configuration item ``kyuubi.engine.type`` of ``kyuubi-defaults.conf`` read, the default value is ``SPARK_SQL``, ``kyuubi.engine.share.level.subdomain``, the default value is ``default``, ``kyuubi.engine.share.level``, the default value is ``USER``. + +If the engine pool mode is enabled through ``kyuubi.engine.pool.size``, the subdomain consists of ``kyuubi.engine.pool.name`` and a number below size, e.g. ``engine-pool-0`` . + +``--engine-share-level`` supports the following enum values. + +- CONNECTION + +The engine Ref Id (UUID) must be specified via ``--engine-subdomain``. + +- USER: + +Default Value. + +- GROUP: + +The ``--user`` parameter is the group name corresponding to the user. + +- SERVER: + +The ``--user`` parameter is the user who started the kyuubi server. + +.. _list_engines: + +List engine +*********** + +List all the engine nodes for a user. + +.. code-block:: bash + + bin/kyuubi-ctl list engine --user AAA + +The management share level is SERVER, the user who starts the kyuubi server is A, the engine is TRINO, and the subdomain is adhoc. + +.. code-block:: bash + + bin/kyuubi-ctl list engine --user A --engine-type TRINO --engine-subdomain adhoc --engine-share-level SERVER + +.. _get_engines: + +Get engine +*********** + +Get Kyuubi engine info belong to a user. + +.. code-block:: bash + + bin/kyuubi-ctl get engine --user AAA --host XXX --port YYY + +.. _delete_engines: + +Delete engine +************* + +Delete the specified engine node for user. + +After the engine node is deleted, the kyuubi engine stops opening new sessions and waits for all currently open sessions to be closed before the process exits. + +.. code-block:: bash + + bin/kyuubi-ctl delete engine --user AAA --host XXX --port YYY \ No newline at end of file diff --git a/docs/tools/spark_block_cleaner.md b/docs/tools/spark_block_cleaner.md index 94e87387577..4a1f20ff884 100644 --- a/docs/tools/spark_block_cleaner.md +++ b/docs/tools/spark_block_cleaner.md @@ -1,20 +1,19 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kubernetes Tools Spark Block Cleaner @@ -35,9 +34,9 @@ Therefore, we chose to use Spark Block Cleaner to clear the block files accumula ## Principle -When deploying Spark Block Cleaner, we will configure volumes for the destination folder. Spark Block Cleaner will perceive the folder by the parameter `CACHE_DIRS`. +When deploying Spark Block Cleaner, we will configure volumes for the destination folder. Spark Block Cleaner will perceive the folder by the parameter `CACHE_DIRS`. -Spark Block Cleaner will clear the perceived folder in a fixed loop(which can be configured by `SCHEDULE_INTERVAL`). And Spark Block Cleaner will select folder start with `blockmgr` and `spark` for deletion using the logic Spark uses to create those folders. +Spark Block Cleaner will clear the perceived folder in a fixed loop(which can be configured by `SCHEDULE_INTERVAL`). And Spark Block Cleaner will select folder start with `blockmgr` and `spark` for deletion using the logic Spark uses to create those folders. Before deleting those files, Spark Block Cleaner will determine whether it is a recently modified file(depending on whether the file has not been acted on within the specified time which configured by `FILE_EXPIRED_TIME`). Only delete files those beyond that time interval. @@ -50,6 +49,7 @@ Before you start using Spark Block Cleaner, you should build its docker images. ### Build Block Cleaner Docker Image In the `KYUUBI_HOME` directory, you can use the following cmd to build docker image. + ```shell docker build ./tools/spark-block-cleaner/kubernetes/docker ``` @@ -60,7 +60,8 @@ You need to modify the `${KYUUBI_HOME}/tools/spark-block-cleaner/kubernetes/spar In Kyuubi tools, we recommend using `DaemonSet` to start, and we offer default yaml file in daemonSet way. -Base file structure: +Base file structure: + ```yaml apiVersion kind @@ -80,6 +81,7 @@ spec ``` You can use affect the performance of Spark Block Cleaner through configure parameters in containers env part of `spark-block-cleaner.yml`. + ```yaml env: - name: CACHE_DIRS @@ -97,17 +99,20 @@ env: The most important thing, configure volumeMounts and volumes corresponding to Spark local-dirs. For example, Spark use /spark/shuffle1 as local-dir, you can configure like: + ```yaml volumes: - name: block-files-dir-1 hostPath: path: /spark/shuffle1 ``` + ```yaml volumeMounts: - name: block-files-dir-1 mountPath: /data/data1 ``` + ```yaml env: - name: CACHE_DIRS @@ -120,10 +125,11 @@ After you finishing modifying the above, you can use the following command `kube ## Related parameters -Name | Default | unit | Meaning ---- | --- | --- | --- -CACHE_DIRS | /data/data1,/data/data2| | The target dirs in container path which will clean block files. -FILE_EXPIRED_TIME | 604800 | seconds | Cleaner will clean the block files which current time - last modified time more than the fileExpiredTime. -DEEP_CLEAN_FILE_EXPIRED_TIME | 432000 | seconds | Deep clean will clean the block files which current time - last modified time more than the deepCleanFileExpiredTime. -FREE_SPACE_THRESHOLD | 60 | % | After first clean, if free Space low than threshold trigger deep clean. -SCHEDULE_INTERVAL | 3600 | seconds | Cleaner sleep between cleaning. +| Name | Default | unit | Meaning | +|------------------------------|-------------------------|---------|-----------------------------------------------------------------------------------------------------------------------| +| CACHE_DIRS | /data/data1,/data/data2 | | The target dirs in container path which will clean block files. | +| FILE_EXPIRED_TIME | 604800 | seconds | Cleaner will clean the block files which current time - last modified time more than the fileExpiredTime. | +| DEEP_CLEAN_FILE_EXPIRED_TIME | 432000 | seconds | Deep clean will clean the block files which current time - last modified time more than the deepCleanFileExpiredTime. | +| FREE_SPACE_THRESHOLD | 60 | % | After first clean, if free Space low than threshold trigger deep clean. | +| SCHEDULE_INTERVAL | 3600 | seconds | Cleaner sleep between cleaning. | + diff --git a/extensions/README.md b/extensions/README.md index 92eac9097d8..5725f0f9b08 100644 --- a/extensions/README.md +++ b/extensions/README.md @@ -1,25 +1,24 @@ - +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> + # For developers This folder contains plugins/extension for kyuubi server and different engine types. - - ext - kyuubi-server - spark @@ -27,4 +26,5 @@ This folder contains plugins/extension for kyuubi server and different engine ty - trino - hive - others - - ... \ No newline at end of file + - ... + diff --git a/extensions/server/kyuubi-server-plugin/pom.xml b/extensions/server/kyuubi-server-plugin/pom.xml index b7dfe0ae8dd..799f27c4632 100644 --- a/extensions/server/kyuubi-server-plugin/pom.xml +++ b/extensions/server/kyuubi-server-plugin/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-extension-spark-3-1/pom.xml b/extensions/spark/kyuubi-extension-spark-3-1/pom.xml index 5bd4b2fd5d6..9f218f9d0fe 100644 --- a/extensions/spark/kyuubi-extension-spark-3-1/pom.xml +++ b/extensions/spark/kyuubi-extension-spark-3-1/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-extension-spark-3-2/pom.xml b/extensions/spark/kyuubi-extension-spark-3-2/pom.xml index daab162b7b8..a80040aca65 100644 --- a/extensions/spark/kyuubi-extension-spark-3-2/pom.xml +++ b/extensions/spark/kyuubi-extension-spark-3-2/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-extension-spark-3-3/pom.xml b/extensions/spark/kyuubi-extension-spark-3-3/pom.xml index cc82912133b..ca729a7819b 100644 --- a/extensions/spark/kyuubi-extension-spark-3-3/pom.xml +++ b/extensions/spark/kyuubi-extension-spark-3-3/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala index ef9da41be13..0db9b3ab88a 100644 --- a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.sql -import org.apache.spark.sql.SparkSessionExtensions +import org.apache.spark.sql.{FinalStageResourceManager, InjectCustomResourceProfile, SparkSessionExtensions} import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, MaxPartitionStrategy} @@ -39,5 +39,8 @@ class KyuubiSparkSQLExtension extends (SparkSessionExtensions => Unit) { // watchdog extension extensions.injectOptimizerRule(ForcedMaxOutputRowsRule) extensions.injectPlannerStrategy(MaxPartitionStrategy) + + extensions.injectQueryStagePrepRule(FinalStageResourceManager) + extensions.injectQueryStagePrepRule(InjectCustomResourceProfile) } } diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala new file mode 100644 index 00000000000..2bf7ae6b75e --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/FinalStageResourceManager.scala @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import scala.annotation.tailrec +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.{ExecutorAllocationClient, MapOutputTrackerMaster, SparkContext, SparkEnv} +import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SortExec, SparkPlan} +import org.apache.spark.sql.execution.adaptive._ +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, ShuffleExchangeExec} + +import org.apache.kyuubi.sql.{KyuubiSQLConf, MarkNumOutputColumnsRule} + +/** + * This rule assumes the final write stage has less cores requirement than previous, otherwise + * this rule would take no effect. + * + * It provide a feature: + * 1. Kill redundant executors before running final write stage + */ +case class FinalStageResourceManager(session: SparkSession) + extends Rule[SparkPlan] with FinalRebalanceStageHelper { + override def apply(plan: SparkPlan): SparkPlan = { + if (!conf.getConf(KyuubiSQLConf.FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_ENABLED)) { + return plan + } + + if (!MarkNumOutputColumnsRule.isWrite(session, plan)) { + return plan + } + + val sc = session.sparkContext + val dra = sc.getConf.getBoolean("spark.dynamicAllocation.enabled", false) + val coresPerExecutor = sc.getConf.getInt("spark.executor.cores", 1) + val minExecutors = sc.getConf.getInt("spark.dynamicAllocation.minExecutors", 0) + val maxExecutors = sc.getConf.getInt("spark.dynamicAllocation.maxExecutors", Int.MaxValue) + val factor = conf.getConf(KyuubiSQLConf.FINAL_WRITE_STAGE_PARTITION_FACTOR) + val hasImprovementRoom = maxExecutors - 1 > minExecutors * factor + // Fast fail if: + // 1. DRA off + // 2. only work with yarn and k8s + // 3. maxExecutors is not bigger than minExecutors * factor + if (!dra || !sc.schedulerBackend.isInstanceOf[CoarseGrainedSchedulerBackend] || + !hasImprovementRoom) { + return plan + } + + val stageOpt = findFinalRebalanceStage(plan) + if (stageOpt.isEmpty) { + return plan + } + + // Since we are in `prepareQueryStage`, the AQE shuffle read has not been applied. + // So we need to apply it by self. + val shuffleRead = queryStageOptimizerRules.foldLeft(stageOpt.get.asInstanceOf[SparkPlan]) { + case (latest, rule) => rule.apply(latest) + } + val (targetCores, stage) = shuffleRead match { + case AQEShuffleReadExec(stage: ShuffleQueryStageExec, partitionSpecs) => + (partitionSpecs.length, stage) + case stage: ShuffleQueryStageExec => + // we can still kill executors if no AQE shuffle read, e.g., `.repartition(2)` + (stage.shuffle.numPartitions, stage) + case _ => + // it should never happen in current Spark, but to be safe do nothing if happens + logWarning("BUG, Please report to Apache Kyuubi community") + return plan + } + // The condition whether inject custom resource profile: + // - target executors < active executors + // - active executors - target executors > min executors + val numActiveExecutors = sc.getExecutorIds().length + val targetExecutors = (math.ceil(targetCores.toFloat / coresPerExecutor) * factor).toInt + .max(1) + val hasBenefits = targetExecutors < numActiveExecutors && + (numActiveExecutors - targetExecutors) > minExecutors + logInfo(s"The snapshot of current executors view, " + + s"active executors: $numActiveExecutors, min executor: $minExecutors, " + + s"target executors: $targetExecutors, has benefits: $hasBenefits") + if (hasBenefits) { + val shuffleId = stage.plan.asInstanceOf[ShuffleExchangeExec].shuffleDependency.shuffleId + val numReduce = stage.plan.asInstanceOf[ShuffleExchangeExec].numPartitions + // Now, there is only a final rebalance stage waiting to execute and all tasks of previous + // stage are finished. Kill redundant existed executors eagerly so the tasks of final + // stage can be centralized scheduled. + killExecutors(sc, targetExecutors, shuffleId, numReduce) + } + + plan + } + + /** + * The priority of kill executors follow: + * 1. kill executor who is younger than other (The older the JIT works better) + * 2. kill executor who produces less shuffle data first + */ + private def findExecutorToKill( + sc: SparkContext, + targetExecutors: Int, + shuffleId: Int, + numReduce: Int): Seq[String] = { + val tracker = SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster] + val shuffleStatus = tracker.shuffleStatuses(shuffleId) + val executorToBlockSize = new mutable.HashMap[String, Long] + shuffleStatus.withMapStatuses { mapStatus => + mapStatus.foreach { status => + var i = 0 + var sum = 0L + while (i < numReduce) { + sum += status.getSizeForBlock(i) + i += 1 + } + executorToBlockSize.getOrElseUpdate(status.location.executorId, sum) + } + } + + val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend] + val executorsWithRegistrationTs = backend.getExecutorsWithRegistrationTs() + val existedExecutors = executorsWithRegistrationTs.keys.toSet + val expectedNumExecutorToKill = existedExecutors.size - targetExecutors + if (expectedNumExecutorToKill < 1) { + return Seq.empty + } + + val executorIdsToKill = new ArrayBuffer[String]() + // We first kill executor who does not hold shuffle block. It would happen because + // the last stage is running fast and finished in a short time. The existed executors are + // from previous stages that have not been killed by DRA, so we can not find it by tracking + // shuffle status. + // We should evict executors by their alive time first and retain all of executors which + // have better locality for shuffle block. + executorsWithRegistrationTs.toSeq.sortBy(_._2).foreach { case (id, _) => + if (executorIdsToKill.length < expectedNumExecutorToKill && + !executorToBlockSize.contains(id)) { + executorIdsToKill.append(id) + } + } + + // Evict the rest executors according to the shuffle block size + executorToBlockSize.toSeq.sortBy(_._2).foreach { case (id, _) => + if (executorIdsToKill.length < expectedNumExecutorToKill) { + executorIdsToKill.append(id) + } + } + + executorIdsToKill.toSeq + } + + private def killExecutors( + sc: SparkContext, + targetExecutors: Int, + shuffleId: Int, + numReduce: Int): Unit = { + val executorAllocationClient = sc.schedulerBackend.asInstanceOf[ExecutorAllocationClient] + + val executorsToKill = findExecutorToKill(sc, targetExecutors, shuffleId, numReduce) + logInfo(s"Request to kill executors, total count ${executorsToKill.size}, " + + s"[${executorsToKill.mkString(", ")}].") + + // Note, `SparkContext#killExecutors` does not allow with DRA enabled, + // see `https://github.com/apache/spark/pull/20604`. + // It may cause the status in `ExecutorAllocationManager` inconsistent with + // `CoarseGrainedSchedulerBackend` for a while. But it should be synchronous finally. + executorAllocationClient.killExecutors( + executorIds = executorsToKill, + adjustTargetNumExecutors = false, + countFailures = false, + force = false) + } + + @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq( + OptimizeSkewInRebalancePartitions, + CoalesceShufflePartitions(session), + OptimizeShuffleWithLocalRead) +} + +trait FinalRebalanceStageHelper { + @tailrec + final protected def findFinalRebalanceStage(plan: SparkPlan): Option[ShuffleQueryStageExec] = { + plan match { + case p: ProjectExec => findFinalRebalanceStage(p.child) + case f: FilterExec => findFinalRebalanceStage(f.child) + case s: SortExec if !s.global => findFinalRebalanceStage(s.child) + case stage: ShuffleQueryStageExec + if stage.isMaterialized && + stage.plan.isInstanceOf[ShuffleExchangeExec] && + stage.plan.asInstanceOf[ShuffleExchangeExec].shuffleOrigin != ENSURE_REQUIREMENTS => + Some(stage) + case _ => None + } + } +} diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/InjectCustomResourceProfile.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/InjectCustomResourceProfile.scala new file mode 100644 index 00000000000..30c042b2a2c --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/InjectCustomResourceProfile.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.{CustomResourceProfileExec, SparkPlan} +import org.apache.spark.sql.execution.adaptive._ + +import org.apache.kyuubi.sql.{KyuubiSQLConf, MarkNumOutputColumnsRule} + +/** + * Inject custom resource profile for final write stage, so we can specify custom + * executor resource configs. + */ +case class InjectCustomResourceProfile(session: SparkSession) + extends Rule[SparkPlan] with FinalRebalanceStageHelper { + override def apply(plan: SparkPlan): SparkPlan = { + if (!conf.getConf(KyuubiSQLConf.FINAL_WRITE_STAGE_RESOURCE_ISOLATION_ENABLED)) { + return plan + } + + if (!MarkNumOutputColumnsRule.isWrite(session, plan)) { + return plan + } + + val stage = findFinalRebalanceStage(plan) + if (stage.isEmpty) { + return plan + } + + // TODO: Ideally, We can call `CoarseGrainedSchedulerBackend.requestTotalExecutors` eagerly + // to reduce the task submit pending time, but it may lose task locality. + // + // By default, it would request executors when catch stage submit event. + injectCustomResourceProfile(plan, stage.get.id) + } + + private def injectCustomResourceProfile(plan: SparkPlan, id: Int): SparkPlan = { + plan match { + case stage: ShuffleQueryStageExec if stage.id == id => + CustomResourceProfileExec(stage) + case _ => plan.mapChildren(child => injectCustomResourceProfile(child, id)) + } + } +} diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/execution/CustomResourceProfileExec.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/execution/CustomResourceProfileExec.scala new file mode 100644 index 00000000000..3698140fbd0 --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/spark/sql/execution/CustomResourceProfileExec.scala @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.network.util.{ByteUnit, JavaUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfileBuilder} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} +import org.apache.spark.sql.vectorized.ColumnarBatch +import org.apache.spark.util.Utils + +import org.apache.kyuubi.sql.KyuubiSQLConf._ + +/** + * This node wraps the final executed plan and inject custom resource profile to the RDD. + * It assumes that, the produced RDD would create the `ResultStage` in `DAGScheduler`, + * so it makes resource isolation between previous and final stage. + * + * Note that, Spark does not support config `minExecutors` for each resource profile. + * Which means, it would retain `minExecutors` for each resource profile. + * So, suggest set `spark.dynamicAllocation.minExecutors` to 0 if enable this feature. + */ +case class CustomResourceProfileExec(child: SparkPlan) extends UnaryExecNode { + override def output: Seq[Attribute] = child.output + override def outputPartitioning: Partitioning = child.outputPartitioning + override def outputOrdering: Seq[SortOrder] = child.outputOrdering + override def supportsColumnar: Boolean = child.supportsColumnar + override def supportsRowBased: Boolean = child.supportsRowBased + override protected def doCanonicalize(): SparkPlan = child.canonicalized + + private val executorCores = conf.getConf(FINAL_WRITE_STAGE_EXECUTOR_CORES).getOrElse( + sparkContext.getConf.getInt("spark.executor.cores", 1)) + private val executorMemory = conf.getConf(FINAL_WRITE_STAGE_EXECUTOR_MEMORY).getOrElse( + sparkContext.getConf.get("spark.executor.memory", "2G")) + private val executorMemoryOverhead = + conf.getConf(FINAL_WRITE_STAGE_EXECUTOR_MEMORY_OVERHEAD) + .getOrElse(sparkContext.getConf.get("spark.executor.memoryOverhead", "1G")) + private val executorOffHeapMemory = conf.getConf(FINAL_WRITE_STAGE_EXECUTOR_OFF_HEAP_MEMORY) + + override lazy val metrics: Map[String, SQLMetric] = { + val base = Map( + "executorCores" -> SQLMetrics.createMetric(sparkContext, "executor cores"), + "executorMemory" -> SQLMetrics.createMetric(sparkContext, "executor memory (MiB)"), + "executorMemoryOverhead" -> SQLMetrics.createMetric( + sparkContext, + "executor memory overhead (MiB)")) + val addition = executorOffHeapMemory.map(_ => + "executorOffHeapMemory" -> + SQLMetrics.createMetric(sparkContext, "executor off heap memory (MiB)")).toMap + base ++ addition + } + + private def wrapResourceProfile[T](rdd: RDD[T]): RDD[T] = { + if (Utils.isTesting) { + // do nothing for local testing + return rdd + } + + metrics("executorCores") += executorCores + metrics("executorMemory") += JavaUtils.byteStringAs(executorMemory, ByteUnit.MiB) + metrics("executorMemoryOverhead") += JavaUtils.byteStringAs( + executorMemoryOverhead, + ByteUnit.MiB) + executorOffHeapMemory.foreach(m => + metrics("executorOffHeapMemory") += JavaUtils.byteStringAs(m, ByteUnit.MiB)) + + val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) + SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq) + + val resourceProfileBuilder = new ResourceProfileBuilder() + val executorResourceRequests = new ExecutorResourceRequests() + executorResourceRequests.cores(executorCores) + executorResourceRequests.memory(executorMemory) + executorResourceRequests.memoryOverhead(executorMemoryOverhead) + executorOffHeapMemory.foreach(executorResourceRequests.offHeapMemory) + resourceProfileBuilder.require(executorResourceRequests) + rdd.withResources(resourceProfileBuilder.build()) + rdd + } + + override protected def doExecute(): RDD[InternalRow] = { + val rdd = child.execute() + wrapResourceProfile(rdd) + } + + override protected def doExecuteColumnar(): RDD[ColumnarBatch] = { + val rdd = child.executeColumnar() + wrapResourceProfile(rdd) + } + + override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = { + this.copy(child = newChild) + } +} diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/test/scala/org/apache/spark/sql/InjectResourceProfileSuite.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/test/scala/org/apache/spark/sql/InjectResourceProfileSuite.scala new file mode 100644 index 00000000000..b0767b18708 --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/test/scala/org/apache/spark/sql/InjectResourceProfileSuite.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent} +import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate + +import org.apache.kyuubi.sql.KyuubiSQLConf + +class InjectResourceProfileSuite extends KyuubiSparkSQLExtensionTest { + private def checkCustomResourceProfile(sqlString: String, exists: Boolean): Unit = { + @volatile var lastEvent: SparkListenerSQLAdaptiveExecutionUpdate = null + val listener = new SparkListener { + override def onOtherEvent(event: SparkListenerEvent): Unit = { + event match { + case e: SparkListenerSQLAdaptiveExecutionUpdate => lastEvent = e + case _ => + } + } + } + + spark.sparkContext.addSparkListener(listener) + try { + sql(sqlString).collect() + spark.sparkContext.listenerBus.waitUntilEmpty() + assert(lastEvent != null) + var current = lastEvent.sparkPlanInfo + var shouldStop = false + while (!shouldStop) { + if (current.nodeName != "CustomResourceProfile") { + if (current.children.isEmpty) { + assert(!exists) + shouldStop = true + } else { + current = current.children.head + } + } else { + assert(exists) + shouldStop = true + } + } + } finally { + spark.sparkContext.removeSparkListener(listener) + } + } + + test("Inject resource profile") { + withTable("t") { + withSQLConf( + "spark.sql.adaptive.forceApply" -> "true", + KyuubiSQLConf.FINAL_STAGE_CONFIG_ISOLATION.key -> "true", + KyuubiSQLConf.FINAL_WRITE_STAGE_RESOURCE_ISOLATION_ENABLED.key -> "true") { + + sql("CREATE TABLE t (c1 int, c2 string) USING PARQUET") + + checkCustomResourceProfile("INSERT INTO TABLE t VALUES(1, 'a')", false) + checkCustomResourceProfile("SELECT 1", false) + checkCustomResourceProfile( + "INSERT INTO TABLE t SELECT /*+ rebalance */ * FROM VALUES(1, 'a')", + true) + } + } + } +} diff --git a/extensions/spark/kyuubi-extension-spark-common/pom.xml b/extensions/spark/kyuubi-extension-spark-common/pom.xml index 2c587fd788a..6d4bd144369 100644 --- a/extensions/spark/kyuubi-extension-spark-common/pom.xml +++ b/extensions/spark/kyuubi-extension-spark-common/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiQueryStagePreparation.scala b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiQueryStagePreparation.scala index 360a2645e50..fee65b35082 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiQueryStagePreparation.scala +++ b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiQueryStagePreparation.scala @@ -133,7 +133,9 @@ case class FinalStageConfigIsolation(session: SparkSession) extends Rule[SparkPl reusedExchangeExec // query stage is leaf node so we need to transform it manually - case queryStage: QueryStageExec => + // compatible with Spark 3.5: + // SPARK-42101: table cache is a independent query stage, so do not need include it. + case queryStage: QueryStageExec if queryStage.nodeName != "TableCacheQueryStage" => queryStageNum += 1 collectNumber(queryStage.plan) queryStage diff --git a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala index 0fe9f649eaa..4df924b519f 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala +++ b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala @@ -190,4 +190,60 @@ object KyuubiSQLConf { .version("1.7.0") .booleanConf .createWithDefault(true) + + val FINAL_WRITE_STAGE_EAGERLY_KILL_EXECUTORS_ENABLED = + buildConf("spark.sql.finalWriteStage.eagerlyKillExecutors.enabled") + .doc("When true, eagerly kill redundant executors before running final write stage.") + .version("1.8.0") + .booleanConf + .createWithDefault(false) + + val FINAL_WRITE_STAGE_PARTITION_FACTOR = + buildConf("spark.sql.finalWriteStage.retainExecutorsFactor") + .doc("If the target executors * factor < active executors, and " + + "target executors * factor > min executors, then kill redundant executors.") + .version("1.8.0") + .doubleConf + .checkValue(_ >= 1, "must be bigger than or equal to 1") + .createWithDefault(1.2) + + val FINAL_WRITE_STAGE_RESOURCE_ISOLATION_ENABLED = + buildConf("spark.sql.finalWriteStage.resourceIsolation.enabled") + .doc( + "When true, make final write stage resource isolation using custom RDD resource profile.") + .version("1.8.0") + .booleanConf + .createWithDefault(false) + + val FINAL_WRITE_STAGE_EXECUTOR_CORES = + buildConf("spark.sql.finalWriteStage.executorCores") + .doc("Specify the executor core request for final write stage. " + + "It would be passed to the RDD resource profile.") + .version("1.8.0") + .intConf + .createOptional + + val FINAL_WRITE_STAGE_EXECUTOR_MEMORY = + buildConf("spark.sql.finalWriteStage.executorMemory") + .doc("Specify the executor on heap memory request for final write stage. " + + "It would be passed to the RDD resource profile.") + .version("1.8.0") + .stringConf + .createOptional + + val FINAL_WRITE_STAGE_EXECUTOR_MEMORY_OVERHEAD = + buildConf("spark.sql.finalWriteStage.executorMemoryOverhead") + .doc("Specify the executor memory overhead request for final write stage. " + + "It would be passed to the RDD resource profile.") + .version("1.8.0") + .stringConf + .createOptional + + val FINAL_WRITE_STAGE_EXECUTOR_OFF_HEAP_MEMORY = + buildConf("spark.sql.finalWriteStage.executorOffHeapMemory") + .doc("Specify the executor off heap memory request for final write stage. " + + "It would be passed to the RDD resource profile.") + .version("1.8.0") + .stringConf + .createOptional } diff --git a/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/KyuubiSparkSQLExtensionTest.scala b/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/KyuubiSparkSQLExtensionTest.scala index fd81948c61a..e58ac726c13 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/KyuubiSparkSQLExtensionTest.scala +++ b/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/KyuubiSparkSQLExtensionTest.scala @@ -29,6 +29,8 @@ import org.apache.kyuubi.sql.KyuubiSQLConf trait KyuubiSparkSQLExtensionTest extends QueryTest with SQLTestUtils with AdaptiveSparkPlanHelper { + sys.props.put("spark.testing", "1") + private var _spark: Option[SparkSession] = None protected def spark: SparkSession = _spark.getOrElse { throw new RuntimeException("test spark session don't initial before using it.") diff --git a/extensions/spark/kyuubi-extension-spark-jdbc-dialect/pom.xml b/extensions/spark/kyuubi-extension-spark-jdbc-dialect/pom.xml index 5588805e9f5..48c4c437923 100644 --- a/extensions/spark/kyuubi-extension-spark-jdbc-dialect/pom.xml +++ b/extensions/spark/kyuubi-extension-spark-jdbc-dialect/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-spark-authz/README.md b/extensions/spark/kyuubi-spark-authz/README.md index c257e30e143..554797ee01d 100644 --- a/extensions/spark/kyuubi-spark-authz/README.md +++ b/extensions/spark/kyuubi-spark-authz/README.md @@ -1,19 +1,19 @@ +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi Spark AuthZ Extension @@ -29,7 +29,6 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -Dspark.version=3.2.1 -Dranger.version=2.3.0 ``` - ### Supported Apache Spark Versions `-Dspark.version=` @@ -54,3 +53,4 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -Dspark.version=3.2.1 -Dran - [x] 1.0.x - [x] 0.7.x - [x] 0.6.x + diff --git a/extensions/spark/kyuubi-spark-authz/pom.xml b/extensions/spark/kyuubi-spark-authz/pom.xml index 532564183a1..8df1b9465a9 100644 --- a/extensions/spark/kyuubi-spark-authz/pom.xml +++ b/extensions/spark/kyuubi-spark-authz/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml @@ -32,7 +32,9 @@ https://kyuubi.apache.org/ + 1.0.0 + 1.19.4 5.7.0 @@ -42,6 +44,10 @@ ranger-plugins-common ${ranger.version} + + com.sun.jersey + jersey-bundle + org.apache.ranger ranger-plugin-classloader @@ -101,6 +107,18 @@ + + com.sun.jersey + jersey-client + ${jersey.client.version} + + + javax.ws.rs + jsr311-api + + + + com.kstruct gethostname4j @@ -283,12 +301,6 @@ test - - com.google.code.gson - gson - test - - com.google.guava guava diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index 3b9b8f24e6b..f1c2297b38e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -219,6 +219,20 @@ "fieldName" : "query", "fieldExtractor" : "LogicalPlanQueryExtractor" } ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.DescribeRelation", + "tableDescs" : [ { + "fieldName" : "relation", + "fieldExtractor" : "ResolvedTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : true, + "setCurrentDatabaseIfMissing" : true + } ], + "opType" : "DESCTABLE", + "queryDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DropColumns", "tableDescs" : [ { @@ -677,23 +691,6 @@ "fieldName" : "oldName", "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : null, - "actionTypeDesc" : { - "fieldName" : null, - "fieldExtractor" : null, - "actionType" : "DELETE" - }, - "tableTypeDesc" : { - "fieldName" : "oldName", - "fieldExtractor" : "TableIdentifierTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW" ] - }, - "catalogDesc" : null, - "isInput" : false, - "setCurrentDatabaseIfMissing" : false - }, { - "fieldName" : "newName", - "fieldExtractor" : "TableIdentifierTableExtractor", - "columnDesc" : null, "actionTypeDesc" : null, "tableTypeDesc" : { "fieldName" : "oldName", @@ -1179,6 +1176,23 @@ } ], "opType" : "TRUNCATETABLE", "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.execution.datasources.CreateTable", + "tableDescs" : [ { + "fieldName" : "tableDesc", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "CREATETABLE", + "queryDescs" : [ { + "fieldName" : "query", + "fieldExtractor" : "LogicalPlanOptionQueryExtractor" + } ] }, { "classname" : "org.apache.spark.sql.execution.datasources.CreateTempViewUsing", "tableDescs" : [ ], diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala index cee79b87d7c..b8220ea2732 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala @@ -95,6 +95,12 @@ object PrivilegesBuilder { val cols = conditionList ++ sortCols buildQuery(s.child, privilegeObjects, projectionList, cols, spark) + case a: Aggregate => + val aggCols = + (a.aggregateExpressions ++ a.groupingExpressions).flatMap(e => collectLeaves(e)) + val cols = conditionList ++ aggCols + buildQuery(a.child, privilegeObjects, projectionList, cols, spark) + case scan if isKnownScan(scan) && scan.resolved => getScanSpec(scan).tables(scan, spark).foreach(mergeProjection(_, scan)) @@ -144,7 +150,7 @@ object PrivilegesBuilder { } } catch { case e: Exception => - LOG.warn(tableDesc.error(plan, e)) + LOG.debug(tableDesc.error(plan, e)) Nil } } @@ -162,7 +168,7 @@ object PrivilegesBuilder { } } catch { case e: Exception => - LOG.warn(databaseDesc.error(plan, e)) + LOG.debug(databaseDesc.error(plan, e)) } } desc.operationType @@ -193,7 +199,7 @@ object PrivilegesBuilder { } } catch { case e: Exception => - LOG.warn(fd.error(plan, e)) + LOG.debug(fd.error(plan, e)) } } spec.operationType diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala index 52e3c01768f..7d62229ee41 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala @@ -35,14 +35,13 @@ object AccessType extends Enumeration { case CREATETABLE | CREATEVIEW | CREATETABLE_AS_SELECT if obj.privilegeObjectType == TABLE_OR_VIEW => if (isInput) SELECT else CREATE - // new table new `CREATE` privilege here and the old table gets `DELETE` via actionType - case ALTERTABLE_RENAME => CREATE case ALTERDATABASE | ALTERDATABASE_LOCATION | ALTERTABLE_ADDCOLS | ALTERTABLE_ADDPARTS | ALTERTABLE_DROPPARTS | ALTERTABLE_LOCATION | + ALTERTABLE_RENAME | ALTERTABLE_PROPERTIES | ALTERTABLE_RENAMECOL | ALTERTABLE_RENAMEPART | diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala index 1109464ac0a..d39aacdcf91 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala @@ -25,10 +25,13 @@ import org.apache.kyuubi.plugin.spark.authz.util.ObjectFilterPlaceHolder class FilterDataSourceV2Strategy(spark: SparkSession) extends Strategy { override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case ObjectFilterPlaceHolder(child) if child.nodeName == "ShowNamespaces" => - spark.sessionState.planner.plan(child).map(FilteredShowNamespaceExec).toSeq + spark.sessionState.planner.plan(child) + .map(FilteredShowNamespaceExec(_, spark.sparkContext)).toSeq case ObjectFilterPlaceHolder(child) if child.nodeName == "ShowTables" => - spark.sessionState.planner.plan(child).map(FilteredShowTablesExec).toSeq + spark.sessionState.planner.plan(child) + .map(FilteredShowTablesExec(_, spark.sparkContext)).toSeq + case _ => Nil } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala index 7cc777d9b89..67519118ecc 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.ranger import org.apache.hadoop.security.UserGroupInformation +import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute @@ -26,24 +27,29 @@ import org.apache.kyuubi.plugin.spark.authz.{ObjectType, OperationType} import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils trait FilteredShowObjectsExec extends LeafExecNode { - def delegated: SparkPlan + def result: Array[InternalRow] - final override def output: Seq[Attribute] = delegated.output - - final private lazy val result = { - delegated.executeCollect().filter(isAllowed(_, AuthZUtils.getAuthzUgi(sparkContext))) - } + override def output: Seq[Attribute] final override def doExecute(): RDD[InternalRow] = { sparkContext.parallelize(result, 1) } +} - protected def isAllowed(r: InternalRow, ugi: UserGroupInformation): Boolean +trait FilteredShowObjectsCheck { + def isAllowed(r: InternalRow, ugi: UserGroupInformation): Boolean } -case class FilteredShowNamespaceExec(delegated: SparkPlan) extends FilteredShowObjectsExec { +case class FilteredShowNamespaceExec(result: Array[InternalRow], output: Seq[Attribute]) + extends FilteredShowObjectsExec {} +object FilteredShowNamespaceExec extends FilteredShowObjectsCheck { + def apply(delegated: SparkPlan, sc: SparkContext): FilteredShowNamespaceExec = { + val result = delegated.executeCollect() + .filter(isAllowed(_, AuthZUtils.getAuthzUgi(sc))) + new FilteredShowNamespaceExec(result, delegated.output) + } - override protected def isAllowed(r: InternalRow, ugi: UserGroupInformation): Boolean = { + override def isAllowed(r: InternalRow, ugi: UserGroupInformation): Boolean = { val database = r.getString(0) val resource = AccessResource(ObjectType.DATABASE, database, null, null) val request = AccessRequest(resource, ugi, OperationType.SHOWDATABASES, AccessType.USE) @@ -52,8 +58,16 @@ case class FilteredShowNamespaceExec(delegated: SparkPlan) extends FilteredShowO } } -case class FilteredShowTablesExec(delegated: SparkPlan) extends FilteredShowObjectsExec { - override protected def isAllowed(r: InternalRow, ugi: UserGroupInformation): Boolean = { +case class FilteredShowTablesExec(result: Array[InternalRow], output: Seq[Attribute]) + extends FilteredShowObjectsExec {} +object FilteredShowTablesExec extends FilteredShowObjectsCheck { + def apply(delegated: SparkPlan, sc: SparkContext): FilteredShowNamespaceExec = { + val result = delegated.executeCollect() + .filter(isAllowed(_, AuthZUtils.getAuthzUgi(sc))) + new FilteredShowNamespaceExec(result, delegated.output) + } + + override def isAllowed(r: InternalRow, ugi: UserGroupInformation): Boolean = { val database = r.getString(0) val table = r.getString(1) val isTemp = r.getBoolean(2) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala index f4dcb3f9fdf..f8e941d9def 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala @@ -19,6 +19,8 @@ package org.apache.kyuubi.plugin.spark.authz.ranger import org.apache.spark.sql.SparkSessionExtensions +import org.apache.kyuubi.plugin.spark.authz.ranger.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1} +import org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter.RuleApplyRowFilter import org.apache.kyuubi.plugin.spark.authz.util.{RuleEliminateMarker, RuleEliminateViewMarker} /** @@ -36,13 +38,15 @@ import org.apache.kyuubi.plugin.spark.authz.util.{RuleEliminateMarker, RuleElimi * @since 1.6.0 */ class RangerSparkExtension extends (SparkSessionExtensions => Unit) { - SparkRangerAdminPlugin.init() + SparkRangerAdminPlugin.initialize() override def apply(v1: SparkSessionExtensions): Unit = { v1.injectCheckRule(AuthzConfigurationChecker) v1.injectResolutionRule(_ => new RuleReplaceShowObjectCommands()) v1.injectResolutionRule(_ => new RuleApplyPermanentViewMarker()) - v1.injectResolutionRule(new RuleApplyRowFilterAndDataMasking(_)) + v1.injectResolutionRule(RuleApplyRowFilter) + v1.injectResolutionRule(RuleApplyDataMaskingStage0) + v1.injectResolutionRule(RuleApplyDataMaskingStage1) v1.injectOptimizerRule(_ => new RuleEliminateMarker()) v1.injectOptimizerRule(new RuleAuthorization(_)) v1.injectOptimizerRule(_ => new RuleEliminateViewMarker()) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyRowFilterAndDataMasking.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyRowFilterAndDataMasking.scala deleted file mode 100644 index b6961c92459..00000000000 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyRowFilterAndDataMasking.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kyuubi.plugin.spark.authz.ranger - -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.expressions.Alias -import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} -import org.apache.spark.sql.catalyst.rules.Rule - -import org.apache.kyuubi.plugin.spark.authz.ObjectType -import org.apache.kyuubi.plugin.spark.authz.serde._ -import org.apache.kyuubi.plugin.spark.authz.util.{PermanentViewMarker, RowFilterAndDataMaskingMarker} -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ - -class RuleApplyRowFilterAndDataMasking(spark: SparkSession) extends Rule[LogicalPlan] { - private def mapChildren(plan: LogicalPlan)(f: LogicalPlan => LogicalPlan): LogicalPlan = { - val newChildren = plan match { - case cmd if isKnownTableCommand(cmd) => - val tableCommandSpec = getTableCommandSpec(cmd) - val queries = tableCommandSpec.queries(cmd) - cmd.children.map { - case c if queries.contains(c) => f(c) - case other => other - } - case _ => - plan.children.map(f) - } - plan.withNewChildren(newChildren) - } - - override def apply(plan: LogicalPlan): LogicalPlan = { - mapChildren(plan) { - case p: RowFilterAndDataMaskingMarker => p - case scan if isKnownScan(scan) && scan.resolved => - val tables = getScanSpec(scan).tables(scan, spark) - tables.headOption.map(applyFilterAndMasking(scan, _)).getOrElse(scan) - case other => apply(other) - } - } - - private def applyFilterAndMasking( - plan: LogicalPlan, - table: Table): LogicalPlan = { - val ugi = getAuthzUgi(spark.sparkContext) - val opType = operationType(plan) - val parse = spark.sessionState.sqlParser.parseExpression _ - val are = AccessResource(ObjectType.TABLE, table.database.orNull, table.table, null) - val art = AccessRequest(are, ugi, opType, AccessType.SELECT) - val filterExprStr = SparkRangerAdminPlugin.getFilterExpr(art) - val newOutput = plan.output.map { attr => - val are = - AccessResource(ObjectType.COLUMN, table.database.orNull, table.table, attr.name) - val art = AccessRequest(are, ugi, opType, AccessType.SELECT) - val maskExprStr = SparkRangerAdminPlugin.getMaskingExpr(art) - if (maskExprStr.isEmpty) { - attr - } else { - val maskExpr = parse(maskExprStr.get) - plan match { - case _: PermanentViewMarker => - Alias(maskExpr, attr.name)(exprId = attr.exprId) - case _ => - Alias(maskExpr, attr.name)() - } - } - } - - if (filterExprStr.isEmpty) { - Project(newOutput, RowFilterAndDataMaskingMarker(plan)) - } else { - val filterExpr = parse(filterExprStr.get) - Project(newOutput, Filter(filterExpr, RowFilterAndDataMaskingMarker(plan))) - } - } -} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala index 1c73acc492e..3d53174f3e6 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala @@ -27,16 +27,15 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.kyuubi.plugin.spark.authz._ import org.apache.kyuubi.plugin.spark.authz.ObjectType._ -import org.apache.kyuubi.plugin.spark.authz.ranger.RuleAuthorization.KYUUBI_AUTHZ_TAG +import org.apache.kyuubi.plugin.spark.authz.ranger.RuleAuthorization._ import org.apache.kyuubi.plugin.spark.authz.ranger.SparkRangerAdminPlugin._ -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._; +import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ class RuleAuthorization(spark: SparkSession) extends Rule[LogicalPlan] { - override def apply(plan: LogicalPlan): LogicalPlan = plan match { - case p if !plan.getTagValue(KYUUBI_AUTHZ_TAG).contains(true) => - RuleAuthorization.checkPrivileges(spark, p) - p.setTagValue(KYUUBI_AUTHZ_TAG, true) - p - case p => p // do nothing if checked privileges already. + override def apply(plan: LogicalPlan): LogicalPlan = { + plan match { + case plan if isAuthChecked(plan) => plan // do nothing if checked privileges already. + case p => checkPrivileges(spark, p) + } } } @@ -44,7 +43,7 @@ object RuleAuthorization { val KYUUBI_AUTHZ_TAG = TreeNodeTag[Boolean]("__KYUUBI_AUTHZ_TAG") - def checkPrivileges(spark: SparkSession, plan: LogicalPlan): Unit = { + private def checkPrivileges(spark: SparkSession, plan: LogicalPlan): LogicalPlan = { val auditHandler = new SparkRangerAuditHandler val ugi = getAuthzUgi(spark.sparkContext) val (inputs, outputs, opType) = PrivilegesBuilder.build(plan, spark) @@ -94,5 +93,17 @@ object RuleAuthorization { verify(Seq(req), auditHandler) } } + markAuthChecked(plan) + } + + private def markAuthChecked(plan: LogicalPlan): LogicalPlan = { + plan.transformUp { case p => + p.setTagValue(KYUUBI_AUTHZ_TAG, true) + p + } + } + + private def isAuthChecked(plan: LogicalPlan): Boolean = { + plan.find(_.getTagValue(KYUUBI_AUTHZ_TAG).contains(true)).nonEmpty } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleHelper.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleHelper.scala new file mode 100644 index 00000000000..3cfe2b9406b --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleHelper.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger + +import org.apache.hadoop.security.UserGroupInformation +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule + +import org.apache.kyuubi.plugin.spark.authz.serde.{getTableCommandSpec, isKnownTableCommand} +import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils + +trait RuleHelper extends Rule[LogicalPlan] { + + def spark: SparkSession + + final protected val parse: String => Expression = spark.sessionState.sqlParser.parseExpression _ + + protected def mapChildren(plan: LogicalPlan)(f: LogicalPlan => LogicalPlan): LogicalPlan = { + val newChildren = plan match { + case cmd if isKnownTableCommand(cmd) => + val tableCommandSpec = getTableCommandSpec(cmd) + val queries = tableCommandSpec.queries(cmd) + cmd.children.map { + case c if queries.contains(c) => f(c) + case other => other + } + case _ => + plan.children.map(f) + } + plan.withNewChildren(newChildren) + } + + def ugi: UserGroupInformation = AuthZUtils.getAuthzUgi(spark.sparkContext) + +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala index 7ece55fe535..78e59ff897f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala @@ -18,11 +18,12 @@ package org.apache.kyuubi.plugin.spark.authz.ranger import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer -import scala.collection.mutable.LinkedHashMap +import scala.collection.mutable.{ArrayBuffer, LinkedHashMap} +import org.apache.hadoop.util.ShutdownHookManager import org.apache.ranger.plugin.policyengine.RangerAccessRequest import org.apache.ranger.plugin.service.RangerBasePlugin +import org.slf4j.LoggerFactory import org.apache.kyuubi.plugin.spark.authz.AccessControlException import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ @@ -30,6 +31,7 @@ import org.apache.kyuubi.plugin.spark.authz.util.RangerConfigProvider object SparkRangerAdminPlugin extends RangerBasePlugin("spark", "sparkSql") with RangerConfigProvider { + final private val LOG = LoggerFactory.getLogger(getClass) /** * For a Spark SQL query, it may contain 0 or more privilege objects to verify, e.g. a typical @@ -60,6 +62,29 @@ object SparkRangerAdminPlugin extends RangerBasePlugin("spark", "sparkSql") s"ranger.plugin.$getServiceType.use.usergroups.from.userstore.enabled", false) + /** + * plugin initialization + * with cleanup shutdown hook registered + */ + def initialize(): Unit = { + this.init() + registerCleanupShutdownHook(this) + } + + /** + * register shutdown hook for plugin cleanup + */ + private def registerCleanupShutdownHook(plugin: RangerBasePlugin): Unit = { + ShutdownHookManager.get().addShutdownHook( + () => { + if (plugin != null) { + LOG.info(s"clean up ranger plugin, appId: ${plugin.getAppId}") + this.cleanup() + } + }, + Integer.MAX_VALUE) + } + def getFilterExpr(req: AccessRequest): Option[String] = { val result = evalRowFilterPolicies(req, null) Option(result) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage0Marker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage0Marker.scala new file mode 100644 index 00000000000..b4314938324 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage0Marker.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +import org.apache.spark.sql.catalyst.expressions.{Attribute, ExprId} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} + +import org.apache.kyuubi.plugin.spark.authz.util.WithInternalChild +case class DataMaskingStage0Marker(child: LogicalPlan, scan: LogicalPlan) + extends UnaryNode with WithInternalChild { + + def exprToMaskers(): Map[ExprId, Attribute] = { + scan.output.map(_.exprId).zip(child.output).flatMap { case (id, expr) => + if (id == expr.exprId) None else Some(id -> expr) + }.toMap + } + + override def output: Seq[Attribute] = child.output + + override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild) + +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage1Marker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage1Marker.scala new file mode 100644 index 00000000000..aed0ac693b1 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage1Marker.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} + +import org.apache.kyuubi.plugin.spark.authz.util.WithInternalChild + +case class DataMaskingStage1Marker(child: LogicalPlan) extends UnaryNode with WithInternalChild { + + override def output: Seq[Attribute] = child.output + + override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild) + +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage0.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage0.scala new file mode 100644 index 00000000000..de125550ac9 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage0.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Alias +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} + +import org.apache.kyuubi.plugin.spark.authz.ObjectType +import org.apache.kyuubi.plugin.spark.authz.OperationType.QUERY +import org.apache.kyuubi.plugin.spark.authz.ranger._ +import org.apache.kyuubi.plugin.spark.authz.serde._ + +/** + * The full data masking rule contains two separate stages. + * + * Step1: RuleApplyDataMaskingStage0 + * - lookup the full plan for supported scans + * - once found, get masker configuration from external column by column + * - use spark sql parser to generate an unresolved expression for each masker + * - add a projection with new output on the right top of the original scan if the output has + * changed + * - Add DataMaskingStage0Marker to track the original expression and its masker expression. + * + * Step2: Spark native rules will resolve our newly added maskers + * + * Step3: [[RuleApplyDataMaskingStage1]] + */ +case class RuleApplyDataMaskingStage0(spark: SparkSession) extends RuleHelper { + + override def apply(plan: LogicalPlan): LogicalPlan = { + val newPlan = mapChildren(plan) { + case p: DataMaskingStage0Marker => p + case p: DataMaskingStage1Marker => p + case scan if isKnownScan(scan) && scan.resolved => + val tables = getScanSpec(scan).tables(scan, spark) + tables.headOption.map(applyMasking(scan, _)).getOrElse(scan) + case other => apply(other) + } + newPlan + } + + private def applyMasking( + plan: LogicalPlan, + table: Table): LogicalPlan = { + val newOutput = plan.output.map { attr => + val are = + AccessResource(ObjectType.COLUMN, table.database.orNull, table.table, attr.name) + val art = AccessRequest(are, ugi, QUERY, AccessType.SELECT) + val maskExprStr = SparkRangerAdminPlugin.getMaskingExpr(art) + maskExprStr.map(parse).map(Alias(_, attr.name)()).getOrElse(attr) + } + if (newOutput == plan.output) { + plan + } else { + DataMaskingStage0Marker(Project(newOutput, plan), plan) + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage1.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage1.scala new file mode 100644 index 00000000000..9589be2e97b --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage1.scala @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.NamedExpression +import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} + +import org.apache.kyuubi.plugin.spark.authz.ranger.RuleHelper +import org.apache.kyuubi.plugin.spark.authz.serde._ + +/** + * See [[RuleApplyDataMaskingStage0]] also. + * + * This is the second step for data masking. It will fulfill the missing attributes that + * have a related masker expression buffered by DataMaskingStage0Marker. + */ +case class RuleApplyDataMaskingStage1(spark: SparkSession) extends RuleHelper { + + override def apply(plan: LogicalPlan): LogicalPlan = { + + plan match { + case marker0: DataMaskingStage0Marker => marker0 + case marker1: DataMaskingStage1Marker => marker1 + case cmd if isKnownTableCommand(cmd) => + val tableCommandSpec = getTableCommandSpec(cmd) + val queries = tableCommandSpec.queries(cmd) + cmd.mapChildren { + case marker0: DataMaskingStage0Marker => marker0 + case marker1: DataMaskingStage1Marker => marker1 + case query if queries.contains(query) && query.resolved => + applyDataMasking(query) + case o => o + } + case cmd: Command if cmd.childrenResolved => + cmd.mapChildren(applyDataMasking) + case cmd: Command => cmd + case other if other.resolved => applyDataMasking(other) + case other => other + } + } + + private def applyDataMasking(plan: LogicalPlan): LogicalPlan = { + assert(plan.resolved, "the current masking approach relies on a resolved plan") + def replaceOriginExprWithMasker(plan: LogicalPlan): LogicalPlan = plan match { + case m: DataMaskingStage0Marker => m + case m: DataMaskingStage1Marker => m + case p => + val maskerExprs = p.collect { + case marker: DataMaskingStage0Marker if marker.resolved => marker.exprToMaskers() + }.flatten.toMap + if (maskerExprs.isEmpty) { + p + } else { + val t = p.transformExpressionsUp { + case e: NamedExpression => maskerExprs.getOrElse(e.exprId, e) + } + t.withNewChildren(t.children.map(replaceOriginExprWithMasker)) + } + } + val newPlan = replaceOriginExprWithMasker(plan) + + if (newPlan == plan) { + plan + } else { + DataMaskingStage1Marker(newPlan) + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RowFilterAndDataMaskingMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RowFilterMarker.scala similarity index 80% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RowFilterAndDataMaskingMarker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RowFilterMarker.scala index 357e9bfc2a5..8817958b585 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RowFilterAndDataMaskingMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RowFilterMarker.scala @@ -15,17 +15,17 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.util +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} -case class RowFilterAndDataMaskingMarker(child: LogicalPlan) extends UnaryNode - with WithInternalChild { +import org.apache.kyuubi.plugin.spark.authz.util.WithInternalChild + +case class RowFilterMarker(child: LogicalPlan) extends UnaryNode with WithInternalChild { override def output: Seq[Attribute] = child.output - override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = - copy(child = newChild) + override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild) } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RuleApplyRowFilter.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RuleApplyRowFilter.scala new file mode 100644 index 00000000000..22bcfae49d9 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RuleApplyRowFilter.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} + +import org.apache.kyuubi.plugin.spark.authz.ObjectType +import org.apache.kyuubi.plugin.spark.authz.OperationType.QUERY +import org.apache.kyuubi.plugin.spark.authz.ranger._ +import org.apache.kyuubi.plugin.spark.authz.serde._ + +case class RuleApplyRowFilter(spark: SparkSession) extends RuleHelper { + + override def apply(plan: LogicalPlan): LogicalPlan = { + val newPlan = mapChildren(plan) { + case p: RowFilterMarker => p + case scan if isKnownScan(scan) && scan.resolved => + val tables = getScanSpec(scan).tables(scan, spark) + tables.headOption.map(applyFilter(scan, _)).getOrElse(scan) + case other => apply(other) + } + newPlan + } + + private def applyFilter( + plan: LogicalPlan, + table: Table): LogicalPlan = { + val are = AccessResource(ObjectType.TABLE, table.database.orNull, table.table, null) + val art = AccessRequest(are, ugi, QUERY, AccessType.SELECT) + val filterExpr = SparkRangerAdminPlugin.getFilterExpr(art).map(parse) + val filtered = filterExpr.foldLeft(plan)((p, expr) => Filter(expr, RowFilterMarker(p))) + filtered + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala index d72d789324e..e96ef8cbfd6 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala @@ -85,7 +85,7 @@ case class TableCommandSpec( qd.extract(plan) } catch { case e: Exception => - LOG.warn(qd.error(plan, e)) + LOG.debug(qd.error(plan, e)) None } } @@ -102,7 +102,7 @@ case class ScanSpec( td.extract(plan, spark) } catch { case e: Exception => - LOG.warn(td.error(plan, e)) + LOG.debug(td.error(plan, e)) None } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala index d2da7257096..448439b8426 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala @@ -17,11 +17,25 @@ package org.apache.kyuubi.plugin.spark.authz.util +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.kyuubi.plugin.spark.authz.ranger.datamasking.{DataMaskingStage0Marker, DataMaskingStage1Marker} +import org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter.RowFilterMarker + class RuleEliminateMarker extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { - plan.transformUp { case rf: RowFilterAndDataMaskingMarker => rf.child } + plan.transformUp { case p => + p.transformExpressionsUp { + case p: SubqueryExpression => + p.withNewPlan(apply(p.plan)) + } match { + case marker0: DataMaskingStage0Marker => marker0.child + case marker1: DataMaskingStage1Marker => marker1.child + case rf: RowFilterMarker => rf.child + case other => other + } + } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json b/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json index b5b069c463a..250df2ddc59 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json +++ b/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json @@ -280,7 +280,8 @@ "values": [ "default", "spark_catalog", - "iceberg_ns" + "iceberg_ns", + "ns1" ], "isExcludes": false, "isRecursive": false @@ -900,7 +901,9 @@ "database": { "values": [ "default", - "spark_catalog" + "spark_catalog", + "iceberg_ns", + "ns1" ], "isExcludes": false, "isRecursive": false @@ -1148,6 +1151,67 @@ "guid": "b3f1f1e0-2bd6-4b20-8a32-a531006ae151", "isEnabled": true, "version": 1 + }, + { + "service": "hive_jenkins", + "name": "someone_access_perm_view", + "policyType": 0, + "policyPriority": 0, + "description": "", + "isAuditEnabled": true, + "resources": { + "database": { + "values": [ + "default" + ], + "isExcludes": false, + "isRecursive": false + }, + "column": { + "values": [ + "*" + ], + "isExcludes": false, + "isRecursive": false + }, + "table": { + "values": [ + "perm_view" + ], + "isExcludes": false, + "isRecursive": false + } + }, + "policyItems": [ + { + "accesses": [ + { + "type": "select", + "isAllowed": true + } + ], + "users": [ + "user_perm_view_only" + ], + "groups": [], + "conditions": [], + "delegateAdmin": false + } + ], + "denyPolicyItems": [], + "allowExceptions": [], + "denyExceptions": [], + "dataMaskPolicyItems": [], + "rowFilterPolicyItems": [], + "options": {}, + "validitySchedules": [], + "policyLabels": [ + "" + ], + "id": 123, + "guid": "2fb6099d-e421-41df-9d24-f2f47bed618e", + "isEnabled": true, + "version": 5 } ], "serviceDef": { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala index d89d0696feb..81397038920 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala @@ -26,7 +26,7 @@ import org.apache.kyuubi.plugin.spark.authz.ranger.AccessType class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { override protected val catalogImpl: String = "hive" override protected val sqlExtensions: String = - if (isSparkV32OrGreater) { + if (isSparkV31OrGreater) { "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" } else "" override protected def format = "iceberg" @@ -38,7 +38,7 @@ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { override protected val supportsPartitionManagement = false override def beforeAll(): Unit = { - if (isSparkV32OrGreater) { + if (isSparkV31OrGreater) { spark.conf.set( s"spark.sql.catalog.$catalogV2", "org.apache.iceberg.spark.SparkCatalog") @@ -51,7 +51,7 @@ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV32OrGreater) + assume(isSparkV31OrGreater) test() } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala index 15f58deb309..43929091769 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala @@ -143,7 +143,7 @@ abstract class PrivilegesBuilderSuite extends AnyFunSuite val (in, out, operationType) = PrivilegesBuilder.build(plan, spark) assert(operationType === ALTERTABLE_RENAME) assert(in.isEmpty) - assert(out.size === 2) + assert(out.size === 1) out.foreach { po => assert(po.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) assert(po.catalog.isEmpty) @@ -151,10 +151,7 @@ abstract class PrivilegesBuilderSuite extends AnyFunSuite assert(Set(oldTableShort, "efg").contains(po.objectName)) assert(po.columns.isEmpty) val accessType = ranger.AccessType(po, operationType, isInput = false) - assert(Set(AccessType.CREATE, AccessType.DROP).contains(accessType)) - if (accessType == AccessType.DROP) { - checkTableOwner(po) - } + assert(accessType == AccessType.ALTER) } } } @@ -1648,6 +1645,48 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val accessType = ranger.AccessType(po, operationType, isInput = false) assert(accessType === AccessType.CREATE) } + + test("KYUUBI #4532: Displays the columns involved in extracting the aggregation operator") { + // case1: There is no project operator involving all columns. + val plan1 = sql(s"SELECT COUNT(key), MAX(value) FROM $reusedPartTable GROUP BY pid") + .queryExecution.optimizedPlan + val (in1, out1, _) = PrivilegesBuilder.build(plan1, spark) + assert(in1.size === 1) + assert(out1.isEmpty) + val pi1 = in1.head + assert(pi1.columns.size === 3) + assert(pi1.columns === Seq("key", "value", "pid")) + + // case2: Some columns are involved, and the group column is not selected. + val plan2 = sql(s"SELECT COUNT(key) FROM $reusedPartTable GROUP BY pid") + .queryExecution.optimizedPlan + val (in2, out2, _) = PrivilegesBuilder.build(plan2, spark) + assert(in2.size === 1) + assert(out2.isEmpty) + val pi2 = in2.head + assert(pi2.columns.size === 2) + assert(pi2.columns === Seq("key", "pid")) + + // case3: Some columns are involved, and the group column is selected. + val plan3 = sql(s"SELECT COUNT(key), pid FROM $reusedPartTable GROUP BY pid") + .queryExecution.optimizedPlan + val (in3, out3, _) = PrivilegesBuilder.build(plan3, spark) + assert(in3.size === 1) + assert(out3.isEmpty) + val pi3 = in3.head + assert(pi3.columns.size === 2) + assert(pi3.columns === Seq("key", "pid")) + + // case4: HAVING & GROUP clause + val plan4 = sql(s"SELECT COUNT(key) FROM $reusedPartTable GROUP BY pid HAVING MAX(key) > 1000") + .queryExecution.optimizedPlan + val (in4, out4, _) = PrivilegesBuilder.build(plan4, spark) + assert(in4.size === 1) + assert(out4.isEmpty) + val pi4 = in4.head + assert(pi4.columns.size === 2) + assert(pi4.columns === Seq("key", "pid")) + } } case class SimpleInsert(userSpecifiedSchema: StructType)(@transient val sparkSession: SparkSession) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala index 0ab88917b6d..ce8d6bc0ccf 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala @@ -22,7 +22,8 @@ import java.security.PrivilegedExceptionAction import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.SparkConf -import org.apache.spark.sql.{DataFrame, SparkSession, SparkSessionExtensions} +import org.apache.spark.sql.{DataFrame, Row, SparkSession, SparkSessionExtensions} +import org.scalatest.Assertions.convertToEqualizer import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ @@ -71,4 +72,32 @@ trait SparkSessionProvider { protected val sql: String => DataFrame = spark.sql + protected def doAs[T](user: String, f: => T): T = { + UserGroupInformation.createRemoteUser(user).doAs[T]( + new PrivilegedExceptionAction[T] { + override def run(): T = f + }) + } + protected def withCleanTmpResources[T](res: Seq[(String, String)])(f: => T): T = { + try { + f + } finally { + res.foreach { + case (t, "table") => doAs("admin", sql(s"DROP TABLE IF EXISTS $t")) + case (db, "database") => doAs("admin", sql(s"DROP DATABASE IF EXISTS $db")) + case (fn, "function") => doAs("admin", sql(s"DROP FUNCTION IF EXISTS $fn")) + case (view, "view") => doAs("admin", sql(s"DROP VIEW IF EXISTS $view")) + case (cacheTable, "cache") => if (isSparkV32OrGreater) { + doAs("admin", sql(s"UNCACHE TABLE IF EXISTS $cacheTable")) + } + case (_, e) => + throw new RuntimeException(s"the resource whose resource type is $e cannot be cleared") + } + } + } + + protected def checkAnswer(user: String, query: String, result: Seq[Row]): Unit = { + doAs(user, assert(sql(query).collect() === result)) + } + } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala index 9d3e6d42df4..dede8142693 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala @@ -515,6 +515,24 @@ abstract class V2CommandsPrivilegesSuite extends PrivilegesBuilderSuite { assert(accessType === AccessType.UPDATE) } + test("DescribeTable") { + val plan = executePlan(s"DESCRIBE TABLE $catalogTable").analyzed + val (inputs, outputs, operationType) = PrivilegesBuilder.build(plan, spark) + assert(operationType === DESCTABLE) + assert(inputs.size === 1) + val po = inputs.head + assert(po.actionType === PrivilegeObjectActionType.OTHER) + assert(po.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) + assert(po.catalog === Some(catalogV2)) + assert(po.dbname === namespace) + assert(po.objectName === catalogTableShort) + assert(po.columns.isEmpty) + checkV2TableOwner(po) + val accessType = AccessType(po, operationType, isInput = true) + assert(accessType === AccessType.SELECT) + assert(outputs.size === 0) + } + // with V2AlterTableCommand test("AddColumns") { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala index ef981515a47..a8b8121e2b0 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala @@ -102,7 +102,6 @@ object TableCommands { val AlterTableRename = { val cmd = "org.apache.spark.sql.execution.command.AlterTableRenameCommand" - val actionTypeDesc = ActionTypeDesc(actionType = Some(DELETE)) val oldTableTableTypeDesc = TableTypeDesc( @@ -112,12 +111,9 @@ object TableCommands { val oldTableD = TableDesc( "oldName", tite, - tableTypeDesc = Some(oldTableTableTypeDesc), - actionTypeDesc = Some(actionTypeDesc)) + tableTypeDesc = Some(oldTableTableTypeDesc)) - val newTableD = - TableDesc("newName", tite, tableTypeDesc = Some(oldTableTableTypeDesc)) - TableCommandSpec(cmd, Seq(oldTableD, newTableD), ALTERTABLE_RENAME) + TableCommandSpec(cmd, Seq(oldTableD), ALTERTABLE_RENAME) } // this is for spark 3.1 or below @@ -350,6 +346,13 @@ object TableCommands { TableCommandSpec(cmd, Nil, CREATEVIEW) } + val CreateTable = { + val cmd = "org.apache.spark.sql.execution.datasources.CreateTable" + val tableDesc = TableDesc("tableDesc", classOf[CatalogTableTableExtractor]) + val queryDesc = QueryDesc("query", "LogicalPlanOptionQueryExtractor") + TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE, queryDescs = Seq(queryDesc)) + } + val CreateDataSourceTable = { val cmd = "org.apache.spark.sql.execution.command.CreateDataSourceTableCommand" val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) @@ -410,6 +413,16 @@ object TableCommands { TableCommandSpec(cmd, Seq(tableDesc), DESCTABLE) } + val DescribeRelationTable = { + val cmd = "org.apache.spark.sql.catalyst.plans.logical.DescribeRelation" + val tableDesc = TableDesc( + "relation", + classOf[ResolvedTableTableExtractor], + isInput = true, + setCurrentDatabaseIfMissing = true) + TableCommandSpec(cmd, Seq(tableDesc), DESCTABLE) + } + val DropTable = { val cmd = "org.apache.spark.sql.execution.command.DropTableCommand" val tableTypeDesc = @@ -601,6 +614,7 @@ object TableCommands { CreateHiveTableAsSelect, CreateHiveTableAsSelect.copy(classname = "org.apache.spark.sql.hive.execution.OptimizedCreateHiveTableAsSelectCommand"), + CreateTable, CreateTableLike, CreateTableV2, CreateTableV2.copy(classname = @@ -614,6 +628,7 @@ object TableCommands { DeleteFromTable, DescribeColumn, DescribeTable, + DescribeRelationTable, DropTable, DropTableV2, InsertIntoDataSource, diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala index a2634bb2672..6b1cedf786f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala @@ -19,6 +19,8 @@ package org.apache.kyuubi.plugin.spark.authz.ranger // scalastyle:off import scala.util.Try +import org.scalatest.Outcome + import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.AccessControlException @@ -29,7 +31,7 @@ import org.apache.kyuubi.plugin.spark.authz.AccessControlException class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { override protected val catalogImpl: String = "hive" override protected val sqlExtensions: String = - if (isSparkV32OrGreater) + if (isSparkV31OrGreater) "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" else "" @@ -38,8 +40,13 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite val table1 = "table1" val outputTable1 = "outputTable1" + override def withFixture(test: NoArgTest): Outcome = { + assume(isSparkV31OrGreater) + test() + } + override def beforeAll(): Unit = { - if (isSparkV32OrGreater) { + if (isSparkV31OrGreater) { spark.conf.set( s"spark.sql.catalog.$catalogV2", "org.apache.iceberg.spark.SparkCatalog") @@ -74,8 +81,6 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite } test("[KYUUBI #3515] MERGE INTO") { - assume(isSparkV32OrGreater) - val mergeIntoSql = s""" |MERGE INTO $catalogV2.$namespace1.$outputTable1 AS target @@ -115,8 +120,6 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite } test("[KYUUBI #3515] UPDATE TABLE") { - assume(isSparkV32OrGreater) - // UpdateTable val e1 = intercept[AccessControlException]( doAs( @@ -133,8 +136,6 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite } test("[KYUUBI #3515] DELETE FROM TABLE") { - assume(isSparkV32OrGreater) - // DeleteFromTable val e6 = intercept[AccessControlException]( doAs("someone", sql(s"DELETE FROM $catalogV2.$namespace1.$table1 WHERE id=2"))) @@ -145,8 +146,6 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite } test("[KYUUBI #3666] Support {OWNER} variable for queries run on CatalogV2") { - assume(isSparkV32OrGreater) - val table = "owner_variable" val select = s"SELECT key FROM $catalogV2.$namespace1.$table" @@ -222,4 +221,11 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite }) } } + + test("[KYUUBI #4255] DESCRIBE TABLE") { + val e1 = intercept[AccessControlException]( + doAs("someone", sql(s"DESCRIBE TABLE $catalogV2.$namespace1.$table1").explain())) + assert(e1.getMessage.contains(s"does not have [select] privilege" + + s" on [$namespace1/$table1]")) + } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerLocalClient.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerLocalClient.scala index 323bd524a95..d7473a58065 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerLocalClient.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerLocalClient.scala @@ -17,22 +17,25 @@ package org.apache.kyuubi.plugin.spark.authz.ranger -import java.io.InputStreamReader +import java.text.SimpleDateFormat -import com.google.gson.GsonBuilder +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.json.JsonMapper import org.apache.ranger.admin.client.RangerAdminRESTClient import org.apache.ranger.plugin.util.ServicePolicies class RangerLocalClient extends RangerAdminRESTClient with RangerClientHelper { - private val g = - new GsonBuilder().setDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z").setPrettyPrinting().create + private val mapper = new JsonMapper() + .setDateFormat(new SimpleDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z")) + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + private val policies: ServicePolicies = { val loader = Thread.currentThread().getContextClassLoader val inputStream = { loader.getResourceAsStream("sparkSql_hive_jenkins.json") } - g.fromJson(new InputStreamReader(inputStream), classOf[ServicePolicies]) + mapper.readValue(inputStream, classOf[ServicePolicies]) } override def getServicePoliciesIfUpdated( diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index 8f95a3f9f3a..4ccf15cba98 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -17,14 +17,10 @@ package org.apache.kyuubi.plugin.spark.authz.ranger -import java.security.PrivilegedExceptionAction -import java.sql.Timestamp - import scala.util.Try -import org.apache.commons.codec.digest.DigestUtils import org.apache.hadoop.security.UserGroupInformation -import org.apache.spark.sql.{Row, SparkSessionExtensions} +import org.apache.spark.sql.SparkSessionExtensions import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.Statistics @@ -43,13 +39,6 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite // scalastyle:on override protected val extension: SparkSessionExtensions => Unit = new RangerSparkExtension - protected def doAs[T](user: String, f: => T): T = { - UserGroupInformation.createRemoteUser(user).doAs[T]( - new PrivilegedExceptionAction[T] { - override def run(): T = f - }) - } - override def afterAll(): Unit = { spark.stop() super.afterAll() @@ -62,24 +51,6 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite s"Permission denied: user [$user] does not have [$privilege] privilege on [$resource]" } - protected def withCleanTmpResources[T](res: Seq[(String, String)])(f: => T): T = { - try { - f - } finally { - res.foreach { - case (t, "table") => doAs("admin", sql(s"DROP TABLE IF EXISTS $t")) - case (db, "database") => doAs("admin", sql(s"DROP DATABASE IF EXISTS $db")) - case (fn, "function") => doAs("admin", sql(s"DROP FUNCTION IF EXISTS $fn")) - case (view, "view") => doAs("admin", sql(s"DROP VIEW IF EXISTS $view")) - case (cacheTable, "cache") => if (isSparkV32OrGreater) { - doAs("admin", sql(s"UNCACHE TABLE IF EXISTS $cacheTable")) - } - case (_, e) => - throw new RuntimeException(s"the resource whose resource type is $e cannot be cleared") - } - } - } - /** * Drops temporary view `viewNames` after calling `f`. */ @@ -247,212 +218,6 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite doAs("admin", assert(Try(sql(create0)).isSuccess)) } - test("row level filter") { - val db = "default" - val table = "src" - val col = "key" - val create = s"CREATE TABLE IF NOT EXISTS $db.$table ($col int, value int) USING $format" - - withCleanTmpResources(Seq((s"$db.${table}2", "table"), (s"$db.$table", "table"))) { - doAs("admin", assert(Try { sql(create) }.isSuccess)) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 1, 1")) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 20, 2")) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 30, 3")) - - doAs( - "kent", - assert(sql(s"SELECT key FROM $db.$table order by key").collect() === - Seq(Row(1), Row(20), Row(30)))) - - Seq( - s"SELECT value FROM $db.$table", - s"SELECT value as key FROM $db.$table", - s"SELECT max(value) FROM $db.$table", - s"SELECT coalesce(max(value), 1) FROM $db.$table", - s"SELECT value FROM $db.$table WHERE value in (SELECT value as key FROM $db.$table)") - .foreach { q => - doAs( - "bob", { - withClue(q) { - assert(sql(q).collect() === Seq(Row(1))) - } - }) - } - doAs( - "bob", { - sql(s"CREATE TABLE $db.src2 using $format AS SELECT value FROM $db.$table") - assert(sql(s"SELECT value FROM $db.${table}2").collect() === Seq(Row(1))) - }) - } - } - - test("[KYUUBI #3581]: row level filter on permanent view") { - assume(isSparkV31OrGreater) - - val db = "default" - val table = "src" - val permView = "perm_view" - val col = "key" - val create = s"CREATE TABLE IF NOT EXISTS $db.$table ($col int, value int) USING $format" - val createView = - s"CREATE OR REPLACE VIEW $db.$permView" + - s" AS SELECT * FROM $db.$table" - - withCleanTmpResources(Seq( - (s"$db.$table", "table"), - (s"$db.$permView", "view"))) { - doAs("admin", assert(Try { sql(create) }.isSuccess)) - doAs("admin", assert(Try { sql(createView) }.isSuccess)) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 1, 1")) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 20, 2")) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 30, 3")) - - Seq( - s"SELECT value FROM $db.$permView", - s"SELECT value as key FROM $db.$permView", - s"SELECT max(value) FROM $db.$permView", - s"SELECT coalesce(max(value), 1) FROM $db.$permView", - s"SELECT value FROM $db.$permView WHERE value in (SELECT value as key FROM $db.$permView)") - .foreach { q => - doAs( - "perm_view_user", { - withClue(q) { - assert(sql(q).collect() === Seq(Row(1))) - } - }) - } - } - } - - test("data masking") { - val db = "default" - val table = "src" - val col = "key" - val create = - s"CREATE TABLE IF NOT EXISTS $db.$table" + - s" ($col int, value1 int, value2 string, value3 string, value4 timestamp, value5 string)" + - s" USING $format" - - withCleanTmpResources(Seq( - (s"$db.${table}2", "table"), - (s"$db.$table", "table"))) { - doAs("admin", assert(Try { sql(create) }.isSuccess)) - doAs( - "admin", - sql( - s"INSERT INTO $db.$table SELECT 1, 1, 'hello', 'world', " + - s"timestamp'2018-11-17 12:34:56', 'World'")) - doAs( - "admin", - sql( - s"INSERT INTO $db.$table SELECT 20, 2, 'kyuubi', 'y', " + - s"timestamp'2018-11-17 12:34:56', 'world'")) - doAs( - "admin", - sql( - s"INSERT INTO $db.$table SELECT 30, 3, 'spark', 'a'," + - s" timestamp'2018-11-17 12:34:56', 'world'")) - - doAs( - "kent", - assert(sql(s"SELECT key FROM $db.$table order by key").collect() === - Seq(Row(1), Row(20), Row(30)))) - - Seq( - s"SELECT value1, value2, value3, value4, value5 FROM $db.$table", - s"SELECT value1 as key, value2, value3, value4, value5 FROM $db.$table", - s"SELECT max(value1), max(value2), max(value3), max(value4), max(value5) FROM $db.$table", - s"SELECT coalesce(max(value1), 1), coalesce(max(value2), 1), coalesce(max(value3), 1), " + - s"coalesce(max(value4), timestamp '2018-01-01 22:33:44'), coalesce(max(value5), 1) " + - s"FROM $db.$table", - s"SELECT value1, value2, value3, value4, value5 FROM $db.$table WHERE value2 in" + - s" (SELECT value2 as key FROM $db.$table)") - .foreach { q => - doAs( - "bob", { - withClue(q) { - assert(sql(q).collect() === - Seq( - Row( - DigestUtils.md5Hex("1"), - "xxxxx", - "worlx", - Timestamp.valueOf("2018-01-01 00:00:00"), - "Xorld"))) - } - }) - } - doAs( - "bob", { - sql(s"CREATE TABLE $db.src2 using $format AS SELECT value1 FROM $db.$table") - assert(sql(s"SELECT value1 FROM $db.${table}2").collect() === - Seq(Row(DigestUtils.md5Hex("1")))) - }) - } - } - - test("[KYUUBI #3581]: data masking on permanent view") { - assume(isSparkV31OrGreater) - - val db = "default" - val table = "src" - val permView = "perm_view" - val col = "key" - val create = - s"CREATE TABLE IF NOT EXISTS $db.$table" + - s" ($col int, value1 int, value2 string)" + - s" USING $format" - - val createView = - s"CREATE OR REPLACE VIEW $db.$permView" + - s" AS SELECT * FROM $db.$table" - - withCleanTmpResources(Seq( - (s"$db.$table", "table"), - (s"$db.$permView", "view"))) { - doAs("admin", assert(Try { sql(create) }.isSuccess)) - doAs("admin", assert(Try { sql(createView) }.isSuccess)) - doAs( - "admin", - sql( - s"INSERT INTO $db.$table SELECT 1, 1, 'hello'")) - - Seq( - s"SELECT value1, value2 FROM $db.$permView") - .foreach { q => - doAs( - "perm_view_user", { - withClue(q) { - assert(sql(q).collect() === - Seq( - Row( - DigestUtils.md5Hex("1"), - "hello"))) - } - }) - } - } - } - - test("KYUUBI #2390: RuleEliminateMarker stays in analyze phase for data masking") { - val db = "default" - val table = "src" - val create = - s"CREATE TABLE IF NOT EXISTS $db.$table (key int, value1 int) USING $format" - - withCleanTmpResources(Seq((s"$db.$table", "table"))) { - doAs("admin", sql(create)) - doAs("admin", sql(s"INSERT INTO $db.$table SELECT 1, 1")) - // scalastyle: off - doAs( - "bob", { - assert(sql(s"select * from $db.$table").collect() === - Seq(Row(1, DigestUtils.md5Hex("1")))) - assert(Try(sql(s"select * from $db.$table").show(1)).isSuccess) - }) - } - } - test("show tables") { val db = "default2" val table = "src" @@ -468,6 +233,7 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite doAs("admin", assert(sql(s"show tables from $db").collect().length === 2)) doAs("bob", assert(sql(s"show tables from $db").collect().length === 0)) doAs("i_am_invisible", assert(sql(s"show tables from $db").collect().length === 0)) + doAs("i_am_invisible", assert(sql(s"show tables from $db").limit(1).isEmpty)) } } @@ -482,6 +248,7 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite doAs("bob", assert(sql(s"SHOW DATABASES").collect().length == 1)) doAs("bob", assert(sql(s"SHOW DATABASES").collectAsList().get(0).getString(0) == "default")) + doAs("i_am_invisible", assert(sql(s"SHOW DATABASES").limit(1).isEmpty)) } } @@ -680,7 +447,6 @@ class InMemoryCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { override protected val catalogImpl: String = "hive" - test("table stats must be specified") { val table = "hive_src" withCleanTmpResources(Seq((table, "table"))) { @@ -757,30 +523,64 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { } test("[KYUUBI #3326] check persisted view and skip shadowed table") { + val db1 = "default" val table = "hive_src" val permView = "perm_view" - val db1 = "default" - val db2 = "db2" withCleanTmpResources(Seq( (s"$db1.$table", "table"), - (s"$db2.$permView", "view"), - (db2, "database"))) { - doAs("admin", sql(s"CREATE TABLE IF NOT EXISTS $db1.$table (id int)")) - - doAs("admin", sql(s"CREATE DATABASE IF NOT EXISTS $db2")) - doAs("admin", sql(s"CREATE VIEW $db2.$permView AS SELECT * FROM $table")) + (s"$db1.$permView", "view"))) { + doAs("admin", sql(s"CREATE TABLE IF NOT EXISTS $db1.$table (id int, name string)")) + doAs("admin", sql(s"CREATE VIEW $db1.$permView AS SELECT * FROM $db1.$table")) + // KYUUBI #3326: with no privileges to the permanent view or the source table val e1 = intercept[AccessControlException]( - doAs("someone", sql(s"select * from $db2.$permView")).show(0)) + doAs( + "someone", { + sql(s"select * from $db1.$permView").collect() + })) if (isSparkV31OrGreater) { - assert(e1.getMessage.contains(s"does not have [select] privilege on [$db2/$permView/id]")) + assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$permView/id]")) } else { assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$table/id]")) } } } + test("KYUUBI #4504: query permanent view with privilege to permanent view only") { + val db1 = "default" + val table = "hive_src" + val permView = "perm_view" + val userPermViewOnly = "user_perm_view_only" + + withCleanTmpResources(Seq( + (s"$db1.$table", "table"), + (s"$db1.$permView", "view"))) { + doAs("admin", sql(s"CREATE TABLE IF NOT EXISTS $db1.$table (id int, name string)")) + doAs("admin", sql(s"CREATE VIEW $db1.$permView AS SELECT * FROM $db1.$table")) + + // query all columns of the permanent view + // with access privileges to the permanent view but no privilege to the source table + val sql1 = s"SELECT * FROM $db1.$permView" + if (isSparkV31OrGreater) { + doAs(userPermViewOnly, { sql(sql1).collect() }) + } else { + val e1 = intercept[AccessControlException](doAs(userPermViewOnly, { sql(sql1).collect() })) + assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$table/id]")) + } + + // query the second column of permanent view with multiple columns + // with access privileges to the permanent view but no privilege to the source table + val sql2 = s"SELECT name FROM $db1.$permView" + if (isSparkV31OrGreater) { + doAs(userPermViewOnly, { sql(sql2).collect() }) + } else { + val e2 = intercept[AccessControlException](doAs(userPermViewOnly, { sql(sql2).collect() })) + assert(e2.getMessage.contains(s"does not have [select] privilege on [$db1/$table/name]")) + } + } + } + test("[KYUUBI #3371] support throws all disallowed privileges in exception") { val db1 = "default" val srcTable1 = "hive_src1" diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala index 6bdab9d9d7b..73a13bc1c3c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala @@ -104,7 +104,15 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu val e1 = intercept[AccessControlException]( doAs("someone", sql(s"select city, id from $catalogV2.$namespace1.$table1").explain())) assert(e1.getMessage.contains(s"does not have [select] privilege" + - s" on [$namespace1/$table1/id]")) + s" on [$namespace1/$table1/city]")) + } + + test("[KYUUBI #4255] DESCRIBE TABLE") { + assume(isSparkV31OrGreater) + val e1 = intercept[AccessControlException]( + doAs("someone", sql(s"DESCRIBE TABLE $catalogV2.$namespace1.$table1").explain())) + assert(e1.getMessage.contains(s"does not have [select] privilege" + + s" on [$namespace1/$table1]")) } test("[KYUUBI #3424] CREATE TABLE") { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForHiveHiveParquetSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForHiveHiveParquetSuite.scala new file mode 100644 index 00000000000..ccc694f9b13 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForHiveHiveParquetSuite.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +class DataMaskingForHiveHiveParquetSuite extends DataMaskingTestBase { + override protected val catalogImpl: String = "hive" + override protected def format: String = "USING hive OPTIONS(fileFormat='parquet')" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForHiveParquetSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForHiveParquetSuite.scala new file mode 100644 index 00000000000..ba254abbd3d --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForHiveParquetSuite.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +class DataMaskingForHiveParquetSuite extends DataMaskingTestBase { + override protected val catalogImpl: String = "hive" + override protected def format: String = "USING parquet" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala new file mode 100644 index 00000000000..99b7eb97300 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +import org.apache.spark.SparkConf +import org.scalatest.Outcome + +import org.apache.kyuubi.Utils + +class DataMaskingForIcebergSuite extends DataMaskingTestBase { + override protected val extraSparkConf: SparkConf = { + val conf = new SparkConf() + + if (isSparkV31OrGreater) { + conf + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.iceberg.spark.SparkCatalog") + .set(s"spark.sql.catalog.testcat.type", "hadoop") + .set( + "spark.sql.catalog.testcat.warehouse", + Utils.createTempDir("iceberg-hadoop").toString) + } + conf + + } + + override protected val catalogImpl: String = "in-memory" + + override protected def format: String = "USING iceberg" + + override def beforeAll(): Unit = { + if (isSparkV31OrGreater) { + super.beforeAll() + } + } + + override def afterAll(): Unit = { + if (isSparkV31OrGreater) { + super.afterAll() + } + } + + override def withFixture(test: NoArgTest): Outcome = { + assume(isSparkV31OrGreater) + test() + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForInMemoryParquetSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForInMemoryParquetSuite.scala new file mode 100644 index 00000000000..1bfb71e79ba --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForInMemoryParquetSuite.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +class DataMaskingForInMemoryParquetSuite extends DataMaskingTestBase { + + override protected val catalogImpl: String = "in-memory" + override protected def format: String = "USING parquet" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala new file mode 100644 index 00000000000..894daeaf711 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking +import java.sql.DriverManager + +import scala.util.Try + +import org.apache.spark.SparkConf +import org.scalatest.Outcome + +class DataMaskingForJDBCV2Suite extends DataMaskingTestBase { + override protected val extraSparkConf: SparkConf = { + val conf = new SparkConf() + if (isSparkV31OrGreater) { + conf + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") + .set( + s"spark.sql.catalog.testcat.driver", + "org.apache.derby.jdbc.AutoloadedDriver") + } + conf + } + + override protected val catalogImpl: String = "in-memory" + + override protected def format: String = "" + + override def beforeAll(): Unit = { + if (isSparkV31OrGreater) super.beforeAll() + } + + override def afterAll(): Unit = { + if (isSparkV31OrGreater) { + super.afterAll() + // cleanup db + Try { + DriverManager.getConnection(s"jdbc:derby:memory:testcat;shutdown=true") + } + } + } + + override def withFixture(test: NoArgTest): Outcome = { + assume(isSparkV31OrGreater) + test() + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala new file mode 100644 index 00000000000..3585397c6fa --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking + +// scalastyle:off +import java.sql.Timestamp + +import scala.util.Try + +import org.apache.commons.codec.digest.DigestUtils.md5Hex +import org.apache.spark.sql.{Row, SparkSessionExtensions} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.kyuubi.plugin.spark.authz.SparkSessionProvider +import org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension + +/** + * Base trait for data masking tests, derivative classes shall name themselves following: + * DataMaskingFor CatalogImpl? FileFormat? Additions? Suite + */ +trait DataMaskingTestBase extends AnyFunSuite with SparkSessionProvider with BeforeAndAfterAll { +// scalastyle:on + override protected val extension: SparkSessionExtensions => Unit = new RangerSparkExtension + + private def setup(): Unit = { + sql(s"CREATE TABLE IF NOT EXISTS default.src" + + "(key int," + + " value1 int," + + " value2 string," + + " value3 string," + + " value4 timestamp," + + " value5 string)" + + s" $format") + + // NOTICE: `bob` has a row filter `key < 20` + sql("INSERT INTO default.src " + + "SELECT 1, 1, 'hello', 'world', timestamp'2018-11-17 12:34:56', 'World'") + sql("INSERT INTO default.src " + + "SELECT 20, 2, 'kyuubi', 'y', timestamp'2018-11-17 12:34:56', 'world'") + sql("INSERT INTO default.src " + + "SELECT 30, 3, 'spark', 'a', timestamp'2018-11-17 12:34:56', 'world'") + sql(s"CREATE TABLE default.unmasked $format AS SELECT * FROM default.src") + } + + private def cleanup(): Unit = { + sql("DROP TABLE IF EXISTS default.src") + sql("DROP TABLE IF EXISTS default.unmasked") + } + + override def beforeAll(): Unit = { + doAs("admin", setup()) + super.beforeAll() + } + override def afterAll(): Unit = { + doAs("admin", cleanup()) + spark.stop + super.afterAll() + } + + test("simple query with a user doesn't have mask rules") { + checkAnswer("kent", "SELECT key FROM default.src order by key", Seq(Row(1), Row(20), Row(30))) + } + + test("simple query with a user has mask rules") { + val result = + Seq(Row(md5Hex("1"), "xxxxx", "worlx", Timestamp.valueOf("2018-01-01 00:00:00"), "Xorld")) + checkAnswer("bob", "SELECT value1, value2, value3, value4, value5 FROM default.src", result) + checkAnswer( + "bob", + "SELECT value1 as key, value2, value3, value4, value5 FROM default.src", + result) + } + + test("star") { + val result = + Seq(Row(1, md5Hex("1"), "xxxxx", "worlx", Timestamp.valueOf("2018-01-01 00:00:00"), "Xorld")) + checkAnswer("bob", "SELECT * FROM default.src", result) + } + + test("simple udf") { + val result = + Seq(Row(md5Hex("1"), "xxxxx", "worlx", Timestamp.valueOf("2018-01-01 00:00:00"), "Xorld")) + checkAnswer( + "bob", + "SELECT max(value1), max(value2), max(value3), max(value4), max(value5) FROM default.src", + result) + } + + test("complex udf") { + val result = + Seq(Row(md5Hex("1"), "xxxxx", "worlx", Timestamp.valueOf("2018-01-01 00:00:00"), "Xorld")) + checkAnswer( + "bob", + "SELECT coalesce(max(value1), 1), coalesce(max(value2), 1), coalesce(max(value3), 1), " + + "coalesce(max(value4), timestamp '2018-01-01 22:33:44'), coalesce(max(value5), 1) " + + "FROM default.src", + result) + } + + test("in subquery") { + val result = + Seq(Row(md5Hex("1"), "xxxxx", "worlx", Timestamp.valueOf("2018-01-01 00:00:00"), "Xorld")) + checkAnswer( + "bob", + "SELECT value1, value2, value3, value4, value5 FROM default.src WHERE value2 in " + + "(SELECT value2 as key FROM default.src)", + result) + } + + test("create a unmasked table as select from a masked one") { + withCleanTmpResources(Seq(("default.src2", "table"))) { + doAs("bob", sql(s"CREATE TABLE default.src2 $format AS SELECT value1 FROM default.src")) + checkAnswer("bob", "SELECT value1 FROM default.src2", Seq(Row(md5Hex("1")))) + } + } + + test("insert into a unmasked table from a masked one") { + withCleanTmpResources(Seq(("default.src2", "table"), ("default.src3", "table"))) { + doAs("bob", sql(s"CREATE TABLE default.src2 (value1 string) $format")) + doAs("bob", sql(s"INSERT INTO default.src2 SELECT value1 from default.src")) + doAs("bob", sql(s"INSERT INTO default.src2 SELECT value1 as v from default.src")) + checkAnswer("bob", "SELECT value1 FROM default.src2", Seq(Row(md5Hex("1")), Row(md5Hex("1")))) + doAs("bob", sql(s"CREATE TABLE default.src3 (k int, value string) $format")) + doAs("bob", sql(s"INSERT INTO default.src3 SELECT key, value1 from default.src")) + doAs("bob", sql(s"INSERT INTO default.src3 SELECT key, value1 as v from default.src")) + checkAnswer("bob", "SELECT value FROM default.src3", Seq(Row(md5Hex("1")), Row(md5Hex("1")))) + } + } + + test("join on an unmasked table") { + val s = "SELECT a.value1, b.value1 FROM default.src a" + + " join default.unmasked b on a.value1=b.value1" + checkAnswer("bob", s, Nil) + checkAnswer("bob", s, Nil) // just for testing query multiple times, don't delete it + } + + test("self join on a masked table") { + val s = "SELECT a.value1, b.value1 FROM default.src a" + + " join default.src b on a.value1=b.value1" + checkAnswer("bob", s, Seq(Row(md5Hex("1"), md5Hex("1")))) + // just for testing query multiple times, don't delete it + checkAnswer("bob", s, Seq(Row(md5Hex("1"), md5Hex("1")))) + } + + test("self join on a masked table and filter the masked column with original value") { + val s = "SELECT a.value1, b.value1 FROM default.src a" + + " join default.src b on a.value1=b.value1" + + " where a.value1='1' and b.value1='1'" + checkAnswer("bob", s, Nil) + checkAnswer("bob", s, Nil) // just for testing query multiple times, don't delete it + } + + test("self join on a masked table and filter the masked column with masked value") { + // scalastyle:off + val s = "SELECT a.value1, b.value1 FROM default.src a" + + " join default.src b on a.value1=b.value1" + + s" where a.value1='${md5Hex("1")}' and b.value1='${md5Hex("1")}'" + // TODO: The v1 an v2 relations generate different implicit type cast rules for filters + // so the bellow test failed in derivative classes that us v2 data source, e.g., DataMaskingForIcebergSuite + // For the issue itself, we might need check the spark logic first + // DataMaskingStage1Marker Project [value1#178, value1#183] + // +- Project [value1#178, value1#183] + // +- Filter ((cast(value1#178 as int) = cast(c4ca4238a0b923820dcc509a6f75849b as int)) AND (cast(value1#183 as int) = cast(c4ca4238a0b923820dcc509a6f75849b as int))) + // +- Join Inner, (value1#178 = value1#183) + // :- SubqueryAlias a + // : +- SubqueryAlias testcat.default.src + // : +- Filter (key#166 < 20) + // : +- RowFilterMarker + // : +- DataMaskingStage0Marker RelationV2[key#166, value1#167, value2#168, value3#169, value4#170, value5#171] default.src + // : +- Project [key#166, md5(cast(cast(value1#167 as string) as binary)) AS value1#178, regexp_replace(regexp_replace(regexp_replace(value2#168, [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1) AS value2#179, regexp_replace(regexp_replace(regexp_replace(value3#169, [A-Z], X, 5), [a-z], x, 5), [0-9], n, 5) AS value3#180, date_trunc(YEAR, value4#170, Some(Asia/Shanghai)) AS value4#181, concat(regexp_replace(regexp_replace(regexp_replace(left(value5#171, (length(value5#171) - 4)), [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1), right(value5#171, 4)) AS value5#182] + // : +- RelationV2[key#166, value1#167, value2#168, value3#169, value4#170, value5#171] default.src + // +- SubqueryAlias b + // +- SubqueryAlias testcat.default.src + // +- Filter (key#172 < 20) + // +- RowFilterMarker + // +- DataMaskingStage0Marker RelationV2[key#172, value1#173, value2#174, value3#175, value4#176, value5#177] default.src + // +- Project [key#172, md5(cast(cast(value1#173 as string) as binary)) AS value1#183, regexp_replace(regexp_replace(regexp_replace(value2#174, [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1) AS value2#184, regexp_replace(regexp_replace(regexp_replace(value3#175, [A-Z], X, 5), [a-z], x, 5), [0-9], n, 5) AS value3#185, date_trunc(YEAR, value4#176, Some(Asia/Shanghai)) AS value4#186, concat(regexp_replace(regexp_replace(regexp_replace(left(value5#177, (length(value5#177) - 4)), [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1), right(value5#177, 4)) AS value5#187] + // +- RelationV2[key#172, value1#173, value2#174, value3#175, value4#176, value5#177] default.src + // + // + // Project [value1#143, value1#148] + // +- Filter ((value1#143 = c4ca4238a0b923820dcc509a6f75849b) AND (value1#148 = c4ca4238a0b923820dcc509a6f75849b)) + // +- Join Inner, (value1#143 = value1#148) + // :- SubqueryAlias a + // : +- SubqueryAlias spark_catalog.default.src + // : +- Filter (key#60 < 20) + // : +- RowFilterMarker + // : +- DataMaskingStage0Marker Relation default.src[key#60,value1#61,value2#62,value3#63,value4#64,value5#65] parquet + // : +- Project [key#60, md5(cast(cast(value1#61 as string) as binary)) AS value1#143, regexp_replace(regexp_replace(regexp_replace(value2#62, [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1) AS value2#144, regexp_replace(regexp_replace(regexp_replace(value3#63, [A-Z], X, 5), [a-z], x, 5), [0-9], n, 5) AS value3#145, date_trunc(YEAR, value4#64, Some(Asia/Shanghai)) AS value4#146, concat(regexp_replace(regexp_replace(regexp_replace(left(value5#65, (length(value5#65) - 4)), [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1), right(value5#65, 4)) AS value5#147] + // : +- Relation default.src[key#60,value1#61,value2#62,value3#63,value4#64,value5#65] parquet + // +- SubqueryAlias b + // +- SubqueryAlias spark_catalog.default.src + // +- Filter (key#153 < 20) + // +- RowFilterMarker + // +- DataMaskingStage0Marker Relation default.src[key#60,value1#61,value2#62,value3#63,value4#64,value5#65] parquet + // +- Project [key#153, md5(cast(cast(value1#154 as string) as binary)) AS value1#148, regexp_replace(regexp_replace(regexp_replace(value2#155, [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1) AS value2#149, regexp_replace(regexp_replace(regexp_replace(value3#156, [A-Z], X, 5), [a-z], x, 5), [0-9], n, 5) AS value3#150, date_trunc(YEAR, value4#157, Some(Asia/Shanghai)) AS value4#151, concat(regexp_replace(regexp_replace(regexp_replace(left(value5#158, (length(value5#158) - 4)), [A-Z], X, 1), [a-z], x, 1), [0-9], n, 1), right(value5#158, 4)) AS value5#152] + // +- Relation default.src[key#153,value1#154,value2#155,value3#156,value4#157,value5#158] parquet + // checkAnswer("bob", s, Seq(Row(md5Hex("1"), md5Hex("1")))) + // + // + // scalastyle:on + + // So here we use value2 to avoid type casting + val s2 = "SELECT a.value1, b.value1 FROM default.src a" + + " join default.src b on a.value1=b.value1" + + s" where a.value2='xxxxx' and b.value2='xxxxx'" + checkAnswer("bob", s2, Seq(Row(md5Hex("1"), md5Hex("1")))) + // just for testing query multiple times, don't delete it + checkAnswer("bob", s2, Seq(Row(md5Hex("1"), md5Hex("1")))) + } + + test("union an unmasked table") { + val s = """ + SELECT value1 from ( + SELECT a.value1 FROM default.src a + union + (SELECT b.value1 FROM default.unmasked b) + ) c order by value1 + """ + checkAnswer("bob", s, Seq(Row("1"), Row("2"), Row("3"), Row(md5Hex("1")))) + } + + test("union a masked table") { + val s = "SELECT a.value1 FROM default.src a union" + + " (SELECT b.value1 FROM default.src b)" + checkAnswer("bob", s, Seq(Row(md5Hex("1")))) + } + + test("KYUUBI #3581: permanent view should lookup rule on itself not the raw table") { + assume(isSparkV31OrGreater) + val supported = doAs( + "perm_view_user", + Try(sql("CREATE OR REPLACE VIEW default.perm_view AS SELECT * FROM default.src")).isSuccess) + assume(supported, s"view support for '$format' has not been implemented yet") + + withCleanTmpResources(Seq(("default.perm_view", "view"))) { + checkAnswer( + "perm_view_user", + "SELECT value1, value2 FROM default.src where key < 20", + Seq(Row(1, "hello"))) + checkAnswer( + "perm_view_user", + "SELECT value1, value2 FROM default.perm_view where key < 20", + Seq(Row(md5Hex("1"), "hello"))) + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForHiveHiveParquetSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForHiveHiveParquetSuite.scala new file mode 100644 index 00000000000..142a2f82508 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForHiveHiveParquetSuite.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfiltering + +class RowFilteringForHiveHiveParquetSuite extends RowFilteringTestBase { + override protected val catalogImpl: String = "hive" + override protected def format: String = "USING hive OPTIONS(fileFormat='parquet')" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForHiveParquetSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForHiveParquetSuite.scala new file mode 100644 index 00000000000..9727643cf93 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForHiveParquetSuite.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfiltering + +class RowFilteringForHiveParquetSuite extends RowFilteringTestBase { + override protected val catalogImpl: String = "hive" + override protected def format: String = "USING parquet" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala new file mode 100644 index 00000000000..2120b195221 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfiltering + +import org.apache.spark.SparkConf +import org.scalatest.Outcome + +import org.apache.kyuubi.Utils +class RowFilteringForIcebergSuite extends RowFilteringTestBase { + override protected val extraSparkConf: SparkConf = { + val conf = new SparkConf() + + if (isSparkV31OrGreater) { + conf + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.iceberg.spark.SparkCatalog") + .set(s"spark.sql.catalog.testcat.type", "hadoop") + .set( + "spark.sql.catalog.testcat.warehouse", + Utils.createTempDir("iceberg-hadoop").toString) + } + conf + + } + + override protected val catalogImpl: String = "in-memory" + + override protected def format: String = "USING iceberg" + + override def beforeAll(): Unit = { + if (isSparkV31OrGreater) { + super.beforeAll() + } + } + + override def afterAll(): Unit = { + if (isSparkV31OrGreater) { + super.afterAll() + } + } + + override def withFixture(test: NoArgTest): Outcome = { + assume(isSparkV31OrGreater) + test() + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForInMemoryParquetSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForInMemoryParquetSuite.scala new file mode 100644 index 00000000000..9baaa2a3166 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForInMemoryParquetSuite.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfiltering + +class RowFilteringForInMemoryParquetSuite extends RowFilteringTestBase { + + override protected val catalogImpl: String = "in-memory" + override protected def format: String = "USING parquet" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala new file mode 100644 index 00000000000..cfdb7dadc46 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfiltering + +import java.sql.DriverManager + +import scala.util.Try + +import org.apache.spark.SparkConf +import org.scalatest.Outcome + +class RowFilteringForJDBCV2Suite extends RowFilteringTestBase { + override protected val extraSparkConf: SparkConf = { + val conf = new SparkConf() + if (isSparkV31OrGreater) { + conf + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") + .set( + s"spark.sql.catalog.testcat.driver", + "org.apache.derby.jdbc.AutoloadedDriver") + } + conf + } + + override protected val catalogImpl: String = "in-memory" + + override protected def format: String = "" + + override def beforeAll(): Unit = { + if (isSparkV31OrGreater) super.beforeAll() + } + + override def afterAll(): Unit = { + if (isSparkV31OrGreater) { + super.afterAll() + // cleanup db + Try { + DriverManager.getConnection(s"jdbc:derby:memory:testcat;shutdown=true") + } + } + } + + override def withFixture(test: NoArgTest): Outcome = { + assume(isSparkV31OrGreater) + test() + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala new file mode 100644 index 00000000000..a73690724e4 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.ranger.rowfiltering + +// scalastyle:off +import scala.util.Try + +import org.apache.spark.sql.{Row, SparkSessionExtensions} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.kyuubi.plugin.spark.authz.SparkSessionProvider +import org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension + +/** + * Base trait for row filtering tests, derivative classes shall name themselves following: + * RowFilteringFor CatalogImpl? FileFormat? Additions? Suite + */ +trait RowFilteringTestBase extends AnyFunSuite with SparkSessionProvider with BeforeAndAfterAll { +// scalastyle:on + override protected val extension: SparkSessionExtensions => Unit = new RangerSparkExtension + + private def setup(): Unit = { + sql(s"CREATE TABLE IF NOT EXISTS default.src(key int, value int) $format") + sql("INSERT INTO default.src SELECT 1, 1") + sql("INSERT INTO default.src SELECT 20, 2") + sql("INSERT INTO default.src SELECT 30, 3") + } + + private def cleanup(): Unit = { + sql("DROP TABLE IF EXISTS default.src") + } + + override def beforeAll(): Unit = { + doAs("admin", setup()) + super.beforeAll() + } + override def afterAll(): Unit = { + doAs("admin", cleanup()) + spark.stop + super.afterAll() + } + + test("user without row filtering rule") { + checkAnswer( + "kent", + "SELECT key FROM default.src order order by key", + Seq(Row(1), Row(20), Row(30))) + } + + test("simple query projecting filtering column") { + checkAnswer("bob", "SELECT key FROM default.src", Seq(Row(1))) + } + + test("simple query projecting non filtering column") { + checkAnswer("bob", "SELECT value FROM default.src", Seq(Row(1))) + } + + test("simple query projecting non filtering column with udf max") { + checkAnswer("bob", "SELECT max(value) FROM default.src", Seq(Row(1))) + } + + test("simple query projecting non filtering column with udf coalesce") { + checkAnswer("bob", "SELECT coalesce(max(value), 1) FROM default.src", Seq(Row(1))) + } + + test("in subquery") { + checkAnswer( + "bob", + "SELECT value FROM default.src WHERE value in (SELECT value as key FROM default.src)", + Seq(Row(1))) + } + + test("ctas") { + withCleanTmpResources(Seq(("default.src2", "table"))) { + doAs("bob", sql(s"CREATE TABLE default.src2 $format AS SELECT value FROM default.src")) + val query = "select value from default.src2" + checkAnswer("admin", query, Seq(Row(1))) + checkAnswer("bob", query, Seq(Row(1))) + } + } + + test("[KYUUBI #3581]: row level filter on permanent view") { + assume(isSparkV31OrGreater) + val supported = doAs( + "perm_view_user", + Try(sql("CREATE OR REPLACE VIEW default.perm_view AS SELECT * FROM default.src")).isSuccess) + assume(supported, s"view support for '$format' has not been implemented yet") + + withCleanTmpResources(Seq((s"default.perm_view", "view"))) { + checkAnswer( + "admin", + "SELECT key FROM default.perm_view order order by key", + Seq(Row(1), Row(20), Row(30))) + checkAnswer("bob", "SELECT key FROM default.perm_view", Seq(Row(1))) + checkAnswer("bob", "SELECT value FROM default.perm_view", Seq(Row(1))) + checkAnswer("bob", "SELECT max(value) FROM default.perm_view", Seq(Row(1))) + checkAnswer("bob", "SELECT coalesce(max(value), 1) FROM default.perm_view", Seq(Row(1))) + checkAnswer( + "bob", + "SELECT value FROM default.perm_view WHERE value in " + + "(SELECT value as key FROM default.perm_view)", + Seq(Row(1))) + } + } +} diff --git a/extensions/spark/kyuubi-spark-connector-common/pom.xml b/extensions/spark/kyuubi-spark-connector-common/pom.xml index e9fa8fcb42a..1cba0ccdd4b 100644 --- a/extensions/spark/kyuubi-spark-connector-common/pom.xml +++ b/extensions/spark/kyuubi-spark-connector-common/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml diff --git a/extensions/spark/kyuubi-spark-connector-hive/pom.xml b/extensions/spark/kyuubi-spark-connector-hive/pom.xml index a97dfa053d0..b75db929d50 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/pom.xml +++ b/extensions/spark/kyuubi-spark-connector-hive/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml @@ -32,7 +32,6 @@ https://kyuubi.apache.org/ - org.apache.kyuubi kyuubi-spark-connector-common_${scala.binary.version} @@ -40,34 +39,34 @@ - org.apache.kyuubi - kyuubi-spark-connector-common_${scala.binary.version} - ${project.version} - test-jar - test + com.google.guava + guava - org.scala-lang - scala-library + org.apache.spark + spark-hive_${scala.binary.version} provided - org.slf4j - slf4j-api + org.apache.hadoop + hadoop-client-api provided - org.apache.spark - spark-sql_${scala.binary.version} - provided + org.apache.kyuubi + kyuubi-spark-connector-common_${scala.binary.version} + ${project.version} + test-jar + test - com.google.guava - guava + org.scalatestplus + scalacheck-1-17_${scala.binary.version} + test @@ -84,17 +83,6 @@ test - - org.apache.spark - spark-hive_${scala.binary.version} - - - - org.scalatestplus - scalacheck-1-17_${scala.binary.version} - test - - org.apache.spark spark-sql_${scala.binary.version} @@ -117,15 +105,10 @@ test - - org.apache.hadoop - hadoop-client-api - - org.apache.hadoop hadoop-client-runtime - runtime + test +- Licensed to the Apache Software Foundation (ASF) under one or more +- contributor license agreements. See the NOTICE file distributed with +- this work for additional information regarding copyright ownership. +- The ASF licenses this file to You under the Apache License, Version 2.0 +- (the "License"); you may not use this file except in compliance with +- the License. You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +--> # Kyuubi Spark Listener Extension ## Functions - [x] All `listener` extensions can be implemented in this module, like `QueryExecutionListener` and `ExtraListener` -- [x] Add `SparkOperationLineageQueryExecutionListener` to extends spark `QueryExecutionListener` +- [x] Add `SparkOperationLineageQueryExecutionListener` to extends spark `QueryExecutionListener` - [x] SQL lineage parsing will be triggered after SQL execution and will be written to the json logger file ## Build @@ -37,3 +37,4 @@ build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -Dspark.version=3.2.1 - [x] 3.3.x (default) - [x] 3.2.x - [x] 3.1.x + diff --git a/extensions/spark/kyuubi-spark-lineage/pom.xml b/extensions/spark/kyuubi-spark-lineage/pom.xml index 74c05299dc9..bc13480d77c 100644 --- a/extensions/spark/kyuubi-spark-lineage/pom.xml +++ b/extensions/spark/kyuubi-spark-lineage/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../../pom.xml @@ -38,15 +38,22 @@ - commons-collections - commons-collections - test + org.apache.kyuubi + kyuubi-common_${scala.binary.version} + ${project.version} + provided org.apache.kyuubi - kyuubi-common_${scala.binary.version} + kyuubi-events_${scala.binary.version} ${project.version} + provided + + + + commons-collections + commons-collections test diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEvent.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/Lineage.scala similarity index 88% rename from extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEvent.scala rename to extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/Lineage.scala index c69b45709b3..4bd0bd0b168 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEvent.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/Lineage.scala @@ -15,9 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.lineage.events - -import org.apache.spark.scheduler.SparkListenerEvent +package org.apache.kyuubi.plugin.lineage case class ColumnLineage(column: String, originalColumns: Set[String]) @@ -60,9 +58,3 @@ object Lineage { new Lineage(inputTables, outputTables, newColumnLineage) } } - -case class OperationLineageEvent( - executionId: Long, - eventTime: Long, - lineage: Option[Lineage], - exception: Option[Throwable]) extends SparkListenerEvent diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/LineageDispatcher.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/LineageDispatcher.scala new file mode 100644 index 00000000000..8f5dc0d9e61 --- /dev/null +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/LineageDispatcher.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.lineage + +import org.apache.spark.sql.execution.QueryExecution + +import org.apache.kyuubi.plugin.lineage.dispatcher.{KyuubiEventDispatcher, SparkEventDispatcher} + +trait LineageDispatcher { + + def send(qe: QueryExecution, lineage: Option[Lineage]): Unit + + def onFailure(qe: QueryExecution, exception: Exception): Unit = {} + +} + +object LineageDispatcher { + + def apply(dispatcherType: String): LineageDispatcher = { + LineageDispatcherType.withName(dispatcherType) match { + case LineageDispatcherType.SPARK_EVENT => new SparkEventDispatcher() + case LineageDispatcherType.KYUUBI_EVENT => new KyuubiEventDispatcher() + case _ => throw new UnsupportedOperationException( + s"Unsupported lineage dispatcher: $dispatcherType.") + } + } + +} diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/LineageDispatcherType.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/LineageDispatcherType.scala new file mode 100644 index 00000000000..d6afea15233 --- /dev/null +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/LineageDispatcherType.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.lineage + +object LineageDispatcherType extends Enumeration { + type LineageDispatcherType = Value + + val SPARK_EVENT, KYUUBI_EVENT = Value +} diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/SparkOperationLineageQueryExecutionListener.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/SparkOperationLineageQueryExecutionListener.scala index c27d2eb8b4a..b83117cde29 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/SparkOperationLineageQueryExecutionListener.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/SparkOperationLineageQueryExecutionListener.scala @@ -17,24 +17,25 @@ package org.apache.kyuubi.plugin.lineage -import org.apache.spark.kyuubi.lineage.SparkContextHelper +import org.apache.spark.kyuubi.lineage.{LineageConf, SparkContextHelper} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.util.QueryExecutionListener -import org.apache.kyuubi.plugin.lineage.events.OperationLineageEvent import org.apache.kyuubi.plugin.lineage.helper.SparkSQLLineageParseHelper class SparkOperationLineageQueryExecutionListener extends QueryExecutionListener { + private lazy val dispatchers: Seq[LineageDispatcher] = { + SparkContextHelper.getConf(LineageConf.DISPATCHERS).map(LineageDispatcher(_)) + } + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { val lineage = - SparkSQLLineageParseHelper(qe.sparkSession).transformToLineage(qe.id, qe.optimizedPlan) - val event = OperationLineageEvent(qe.id, System.currentTimeMillis(), lineage, None) - SparkContextHelper.postEventToSparkListenerBus(event) + SparkSQLLineageParseHelper(qe.sparkSession).transformToLineage(qe.id, qe.analyzed) + dispatchers.foreach(_.send(qe, lineage)) } override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = { - val event = OperationLineageEvent(qe.id, System.currentTimeMillis(), None, Some(exception)) - SparkContextHelper.postEventToSparkListenerBus(event) + dispatchers.foreach(_.onFailure(qe, exception)) } } diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/dispatcher/KyuubiEventDispatcher.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/dispatcher/KyuubiEventDispatcher.scala new file mode 100644 index 00000000000..6a9e65948a6 --- /dev/null +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/dispatcher/KyuubiEventDispatcher.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.lineage.dispatcher + +import org.apache.spark.sql.execution.QueryExecution + +import org.apache.kyuubi.Utils +import org.apache.kyuubi.events.{EventBus, KyuubiEvent} +import org.apache.kyuubi.plugin.lineage.{Lineage, LineageDispatcher} + +class KyuubiEventDispatcher extends LineageDispatcher { + + override def send(qe: QueryExecution, lineage: Option[Lineage]): Unit = { + val event = OperationLineageKyuubiEvent(qe.id, System.currentTimeMillis(), lineage, None) + EventBus.post(event) + } + + override def onFailure(qe: QueryExecution, exception: Exception): Unit = { + val event = + OperationLineageKyuubiEvent(qe.id, System.currentTimeMillis(), None, Some(exception)) + EventBus.post(event) + } + +} + +case class OperationLineageKyuubiEvent( + executionId: Long, + eventTime: Long, + lineage: Option[Lineage], + exception: Option[Throwable]) extends KyuubiEvent { + override def partitions: Seq[(String, String)] = + ("day", Utils.getDateFromTimestamp(eventTime)) :: Nil +} diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/dispatcher/SparkEventDispatcher.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/dispatcher/SparkEventDispatcher.scala new file mode 100644 index 00000000000..36fbbb7d4a0 --- /dev/null +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/dispatcher/SparkEventDispatcher.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.lineage.dispatcher + +import org.apache.spark.kyuubi.lineage.SparkContextHelper +import org.apache.spark.scheduler.SparkListenerEvent +import org.apache.spark.sql.execution.QueryExecution + +import org.apache.kyuubi.plugin.lineage.{Lineage, LineageDispatcher} + +class SparkEventDispatcher extends LineageDispatcher { + + override def send(qe: QueryExecution, lineage: Option[Lineage]): Unit = { + val event = OperationLineageSparkEvent(qe.id, System.currentTimeMillis(), lineage, None) + SparkContextHelper.postEventToSparkListenerBus(event) + } + + override def onFailure(qe: QueryExecution, exception: Exception): Unit = { + val event = OperationLineageSparkEvent(qe.id, System.currentTimeMillis(), None, Some(exception)) + SparkContextHelper.postEventToSparkListenerBus(event) + } +} + +case class OperationLineageSparkEvent( + executionId: Long, + eventTime: Long, + lineage: Option[Lineage], + exception: Option[Throwable]) extends SparkListenerEvent diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParseHelper.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParseHelper.scala index f70e09126cb..f060cc99422 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParseHelper.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParseHelper.scala @@ -21,12 +21,12 @@ import scala.collection.immutable.ListMap import scala.util.{Failure, Success, Try} import org.apache.spark.internal.Logging +import org.apache.spark.kyuubi.lineage.{LineageConf, SparkContextHelper} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{NamedRelation, PersistedView, ViewType} import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, HiveTableRelation} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, Expression, NamedExpression} -import org.apache.spark.sql.catalyst.expressions.ScalarSubquery +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, Expression, NamedExpression, ScalarSubquery} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi} import org.apache.spark.sql.catalyst.plans.logical._ @@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation} -import org.apache.kyuubi.plugin.lineage.events.Lineage +import org.apache.kyuubi.plugin.lineage.Lineage import org.apache.kyuubi.plugin.lineage.helper.SparkListenerHelper.isSparkVersionAtMost trait LineageParser { @@ -128,7 +128,7 @@ trait LineageParser { exp.toAttribute, if (!containsCountAll(exp.child)) references else references + exp.toAttribute.withName(AGGREGATE_COUNT_COLUMN_IDENTIFIER)) - case a: Attribute => a -> a.references + case a: Attribute => a -> AttributeSet(a) } ListMap(exps: _*) } @@ -149,6 +149,9 @@ trait LineageParser { attr.withQualifier(attr.qualifier.init) case attr if attr.name.equalsIgnoreCase(AGGREGATE_COUNT_COLUMN_IDENTIFIER) => attr.withQualifier(qualifier) + case attr if isNameWithQualifier(attr, qualifier) => + val newName = attr.name.split('.').last.stripPrefix("`").stripSuffix("`") + attr.withName(newName).withQualifier(qualifier) }) } } else { @@ -160,6 +163,12 @@ trait LineageParser { } } + private def isNameWithQualifier(attr: Attribute, qualifier: Seq[String]): Boolean = { + val nameTokens = attr.name.split('.') + val namespace = nameTokens.init.mkString(".") + nameTokens.length > 1 && namespace.endsWith(qualifier.mkString(".")) + } + private def mergeRelationColumnLineage( parentColumnsLineage: AttributeMap[AttributeSet], relationOutput: Seq[Attribute], @@ -303,7 +312,7 @@ trait LineageParser { val nextColumnsLlineage = ListMap(allAssignments.map { assignment => ( assignment.key.asInstanceOf[Attribute], - AttributeSet(assignment.value.asInstanceOf[Attribute])) + assignment.value.references) }: _*) val targetTable = getPlanField[LogicalPlan]("targetTable", plan) val sourceTable = getPlanField[LogicalPlan]("sourceTable", plan) @@ -316,6 +325,10 @@ trait LineageParser { } ListMap(targetColumnsWithTargetTable.zip(sourceColumnsLineage.values).toSeq: _*) + case p if p.nodeName == "WithCTE" => + val optimized = sparkSession.sessionState.optimizer.execute(p) + extractColumnsLineage(optimized, parentColumnsLineage) + // For query case p: Project => val nextColumnsLineage = @@ -327,6 +340,45 @@ trait LineageParser { joinColumnsLineage(parentColumnsLineage, getSelectColumnLineage(p.aggregateExpressions)) p.children.map(extractColumnsLineage(_, nextColumnsLineage)).reduce(mergeColumnsLineage) + case p: Expand => + val references = + p.projections.transpose.map(_.flatMap(x => x.references)).map(AttributeSet(_)) + + val childColumnsLineage = ListMap(p.output.zip(references): _*) + val nextColumnsLineage = + joinColumnsLineage(parentColumnsLineage, childColumnsLineage) + p.children.map(extractColumnsLineage(_, nextColumnsLineage)).reduce(mergeColumnsLineage) + + case p: Generate => + val generateColumnsLineageWithId = + ListMap(p.generatorOutput.map(attrRef => (attrRef.toAttribute.exprId, p.references)): _*) + + val nextColumnsLineage = parentColumnsLineage.map { + case (key, attrRefs) => + key -> AttributeSet(attrRefs.flatMap(attr => + generateColumnsLineageWithId.getOrElse( + attr.exprId, + AttributeSet(attr)))) + } + p.children.map(extractColumnsLineage(_, nextColumnsLineage)).reduce(mergeColumnsLineage) + + case p: Window => + val windowColumnsLineage = + ListMap(p.windowExpressions.map(exp => (exp.toAttribute, exp.references)): _*) + + val nextColumnsLineage = if (parentColumnsLineage.isEmpty) { + ListMap(p.child.output.map(attr => (attr, attr.references)): _*) ++ windowColumnsLineage + } else { + parentColumnsLineage.map { + case (k, _) if windowColumnsLineage.contains(k) => + k -> windowColumnsLineage(k) + case (k, attrs) => + k -> AttributeSet(attrs.flatten(attr => + windowColumnsLineage.getOrElse(attr, AttributeSet(attr)))) + } + } + p.children.map(extractColumnsLineage(_, nextColumnsLineage)).reduce(mergeColumnsLineage) + case p: Join => p.joinType match { case LeftSemi | LeftAnti => @@ -337,14 +389,22 @@ trait LineageParser { } case p: Union => - // merge all children in to one derivedColumns - val childrenUnion = - p.children.map(extractColumnsLineage(_, ListMap[Attribute, AttributeSet]())).map( - _.values).reduce { - (left, right) => - left.zip(right).map(attr => attr._1 ++ attr._2) + val childrenColumnsLineage = + // support for the multi-insert statement + if (p.output.isEmpty) { + p.children + .map(extractColumnsLineage(_, ListMap[Attribute, AttributeSet]())) + .reduce(mergeColumnsLineage) + } else { + // merge all children in to one derivedColumns + val childrenUnion = + p.children.map(extractColumnsLineage(_, ListMap[Attribute, AttributeSet]())).map( + _.values).reduce { + (left, right) => + left.zip(right).map(attr => attr._1 ++ attr._2) + } + ListMap(p.output.zip(childrenUnion): _*) } - val childrenColumnsLineage = ListMap(p.output.zip(childrenUnion): _*) joinColumnsLineage(parentColumnsLineage, childrenColumnsLineage) case p: LogicalRelation if p.catalogTable.nonEmpty => @@ -369,6 +429,29 @@ trait LineageParser { case p: LocalRelation => joinRelationColumnLineage(parentColumnsLineage, p.output, Seq(LOCAL_TABLE_IDENTIFIER)) + case _: OneRowRelation => + parentColumnsLineage.map { + case (k, attrs) => + k -> AttributeSet(attrs.map { + case attr + if attr.qualifier.nonEmpty && attr.qualifier.last.equalsIgnoreCase( + SUBQUERY_COLUMN_IDENTIFIER) => + attr.withQualifier(attr.qualifier.init) + case attr => attr + }) + } + + case p: View => + if (!p.isTempView && SparkContextHelper.getConf( + LineageConf.SKIP_PARSING_PERMANENT_VIEW_ENABLED)) { + val viewName = p.desc.qualifiedName + joinRelationColumnLineage(parentColumnsLineage, p.output, Seq(viewName)) + } else { + val viewColumnsLineage = + extractColumnsLineage(p.child, ListMap[Attribute, AttributeSet]()) + mergeRelationColumnLineage(parentColumnsLineage, p.output, viewColumnsLineage) + } + case p: InMemoryRelation => // get logical plan from cachedPlan val cachedTableLogical = findSparkPlanLogicalLink(Seq(p.cacheBuilder.cachedPlan)) diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/LineageConf.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/LineageConf.scala new file mode 100644 index 00000000000..6fb5399c059 --- /dev/null +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/LineageConf.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.kyuubi.lineage + +import org.apache.spark.internal.config.ConfigBuilder + +import org.apache.kyuubi.plugin.lineage.LineageDispatcherType + +object LineageConf { + + val SKIP_PARSING_PERMANENT_VIEW_ENABLED = + ConfigBuilder("spark.kyuubi.plugin.lineage.skip.parsing.permanent.view.enabled") + .doc("Whether to skip the lineage parsing of permanent views") + .version("1.8.0") + .booleanConf + .createWithDefault(false) + + val DISPATCHERS = ConfigBuilder("spark.kyuubi.plugin.lineage.dispatchers") + .doc("The lineage dispatchers are implementations of " + + "`org.apache.kyuubi.plugin.lineage.LineageDispatcher` for dispatching lineage events.
        " + + "
      • SPARK_EVENT: send lineage event to spark event bus
      • " + + "
      • KYUUBI_EVENT: send lineage event to kyuubi event bus
      • " + + "
      ") + .version("1.8.0") + .stringConf + .toSequence + .checkValue( + _.toSet.subsetOf(LineageDispatcherType.values.map(_.toString)), + "Unsupported lineage dispatchers") + .createWithDefault(Seq(LineageDispatcherType.SPARK_EVENT.toString)) + +} diff --git a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/SparkContextHelper.scala b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/SparkContextHelper.scala index e6272364f80..6e0f0e5c846 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/SparkContextHelper.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/spark/kyuubi/lineage/SparkContextHelper.scala @@ -18,6 +18,7 @@ package org.apache.spark.kyuubi.lineage import org.apache.spark.SparkContext +import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.scheduler.SparkListenerEvent import org.apache.spark.sql.SparkSession @@ -31,4 +32,11 @@ object SparkContextHelper { sc.listenerBus.post(event) } + def getConf[T](entry: ConfigEntry[T]): T = { + globalSparkContext.getConf.get(entry) + } + + def setConf[T](entry: ConfigEntry[T], value: T): Unit = { + globalSparkContext.conf.set(entry, value) + } } diff --git a/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEventSuite.scala b/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEventSuite.scala index 6eeebbd3c50..67e94ad0b79 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEventSuite.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/events/OperationLineageEventSuite.scala @@ -19,11 +19,17 @@ package org.apache.kyuubi.plugin.lineage.events import java.util.concurrent.{CountDownLatch, TimeUnit} +import scala.collection.immutable.List + import org.apache.spark.SparkConf +import org.apache.spark.kyuubi.lineage.LineageConf._ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent} import org.apache.spark.sql.SparkListenerExtensionTest import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.events.EventBus +import org.apache.kyuubi.plugin.lineage.Lineage +import org.apache.kyuubi.plugin.lineage.dispatcher.{OperationLineageKyuubiEvent, OperationLineageSparkEvent} import org.apache.kyuubi.plugin.lineage.helper.SparkListenerHelper.isSparkVersionAtMost class OperationLineageEventSuite extends KyuubiFunSuite with SparkListenerExtensionTest { @@ -40,18 +46,21 @@ class OperationLineageEventSuite extends KyuubiFunSuite with SparkListenerExtens .set( "spark.sql.queryExecutionListeners", "org.apache.kyuubi.plugin.lineage.SparkOperationLineageQueryExecutionListener") + .set(DISPATCHERS.key, "SPARK_EVENT,KYUUBI_EVENT") + .set(SKIP_PARSING_PERMANENT_VIEW_ENABLED.key, "true") } test("operation lineage event capture: for execute sql") { - val countDownLatch = new CountDownLatch(1) - var actual: Lineage = null + val countDownLatch = new CountDownLatch(2) + // get lineage from spark event + var actualSparkEventLineage: Lineage = null spark.sparkContext.addSparkListener(new SparkListener { override def onOtherEvent(event: SparkListenerEvent): Unit = { event match { - case lineageEvent: OperationLineageEvent => + case lineageEvent: OperationLineageSparkEvent => lineageEvent.lineage.foreach { case lineage if lineage.inputTables.nonEmpty => - actual = lineage + actualSparkEventLineage = lineage countDownLatch.countDown() } case _ => @@ -59,6 +68,16 @@ class OperationLineageEventSuite extends KyuubiFunSuite with SparkListenerExtens } }) + // get lineage from kyuubi event + var actualKyuubiEventLineage: Lineage = null + EventBus.register[OperationLineageKyuubiEvent] { lineageEvent: OperationLineageKyuubiEvent => + lineageEvent.lineage.foreach { + case lineage if lineage.inputTables.nonEmpty => + actualKyuubiEventLineage = lineage + countDownLatch.countDown() + } + } + withTable("test_table0") { _ => spark.sql("create table test_table0(a string, b string)") spark.sql("select a as col0, b as col1 from test_table0").collect() @@ -69,7 +88,8 @@ class OperationLineageEventSuite extends KyuubiFunSuite with SparkListenerExtens ("col0", Set("default.test_table0.a")), ("col1", Set("default.test_table0.b")))) countDownLatch.await(20, TimeUnit.SECONDS) - assert(actual == expected) + assert(actualSparkEventLineage == expected) + assert(actualKyuubiEventLineage == expected) } } @@ -86,7 +106,8 @@ class OperationLineageEventSuite extends KyuubiFunSuite with SparkListenerExtens spark.sparkContext.addSparkListener(new SparkListener { override def onOtherEvent(event: SparkListenerEvent): Unit = { event match { - case lineageEvent: OperationLineageEvent if executionId == lineageEvent.executionId => + case lineageEvent: OperationLineageSparkEvent + if executionId == lineageEvent.executionId => lineageEvent.lineage.foreach { lineage => assert(lineage == expected) countDownLatch.countDown() @@ -116,4 +137,40 @@ class OperationLineageEventSuite extends KyuubiFunSuite with SparkListenerExtens } } + test("test for skip parsing permanent view") { + val countDownLatch = new CountDownLatch(1) + var actual: Lineage = null + spark.sparkContext.addSparkListener(new SparkListener { + override def onOtherEvent(event: SparkListenerEvent): Unit = { + event match { + case lineageEvent: OperationLineageSparkEvent => + lineageEvent.lineage.foreach { + case lineage if lineage.inputTables.nonEmpty && lineage.outputTables.isEmpty => + actual = lineage + countDownLatch.countDown() + } + case _ => + } + } + }) + + withTable("t1") { _ => + spark.sql("CREATE TABLE t1 (a string, b string, c string) USING hive") + spark.sql("CREATE VIEW t2 as select * from t1") + spark.sql( + s"select a as k, b" + + s" from t2" + + s" where a in ('HELLO') and c = 'HELLO'").collect() + + val expected = Lineage( + List("default.t2"), + List(), + List( + ("k", Set("default.t2.a")), + ("b", Set("default.t2.b")))) + countDownLatch.await(20, TimeUnit.SECONDS) + assert(actual == expected) + } + } + } diff --git a/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala b/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala index 6652be9ea15..96003f051f5 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala @@ -21,6 +21,7 @@ import scala.collection.immutable.List import scala.reflect.io.File import org.apache.spark.SparkConf +import org.apache.spark.kyuubi.lineage.{LineageConf, SparkContextHelper} import org.apache.spark.sql.{DataFrame, SparkListenerExtensionTest, SparkSession, SQLContext} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} @@ -28,7 +29,7 @@ import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation, SchemaRel import org.apache.spark.sql.types.{IntegerType, StringType, StructType} import org.apache.kyuubi.KyuubiFunSuite -import org.apache.kyuubi.plugin.lineage.events.Lineage +import org.apache.kyuubi.plugin.lineage.Lineage import org.apache.kyuubi.plugin.lineage.helper.SparkListenerHelper.isSparkVersionAtMost class SparkSQLLineageParserHelperSuite extends KyuubiFunSuite @@ -171,7 +172,7 @@ class SparkSQLLineageParserHelperSuite extends KyuubiFunSuite "WHEN MATCHED THEN " + " UPDATE SET target.name = source.name, target.price = source.price " + "WHEN NOT MATCHED THEN " + - " INSERT (id, name, price) VALUES (source.id, source.name, source.price)") + " INSERT (id, name, price) VALUES (cast(source.id as int), source.name, source.price)") assert(ret0 == Lineage( List("v2_catalog.db.source_t"), List("v2_catalog.db.target_t"), @@ -932,8 +933,8 @@ class SparkSQLLineageParserHelperSuite extends KyuubiFunSuite df0.cache() val df1 = spark.sql("select a, b from table1") val df = df0.join(df1).select(df0("a0").alias("aa"), df1("b").alias("bb")) - val optimized = df.queryExecution.optimizedPlan - val ret1 = SparkSQLLineageParseHelper(spark).transformToLineage(0, optimized).get + val analyzed = df.queryExecution.analyzed + val ret1 = SparkSQLLineageParseHelper(spark).transformToLineage(0, analyzed).get assert(ret1 == Lineage( List("default.table0", "default.table1"), List(), @@ -1091,6 +1092,259 @@ class SparkSQLLineageParserHelperSuite extends KyuubiFunSuite List( ("aa", Set("default.table1.a", "default.table0.a")), ("bb", Set("default.table1.b"))))) + + val sql11 = + """ + |select tmp.a, b from (select * from table1) tmp; + |""".stripMargin + + val ret11 = exectractLineage(sql11) + assert(ret11 == Lineage( + List("default.table1"), + List(), + List( + ("a", Set("default.table1.a")), + ("b", Set("default.table1.b"))))) + } + } + + test("test group by") { + withTable("t1", "t2", "v2_catalog.db.t1", "v2_catalog.db.t2") { _ => + spark.sql("CREATE TABLE t1 (a string, b string, c string) USING hive") + spark.sql("CREATE TABLE t2 (a string, b string, c string) USING hive") + spark.sql("CREATE TABLE v2_catalog.db.t1 (a string, b string, c string)") + spark.sql("CREATE TABLE v2_catalog.db.t2 (a string, b string, c string)") + val ret0 = + exectractLineage( + s"insert into table t1 select a," + + s"concat_ws('/', collect_set(b))," + + s"count(distinct(b)) * count(distinct(c))" + + s"from t2 group by a") + assert(ret0 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set("default.t2.a")), + ("default.t1.b", Set("default.t2.b")), + ("default.t1.c", Set("default.t2.b", "default.t2.c"))))) + + val ret1 = + exectractLineage( + s"insert into table v2_catalog.db.t1 select a," + + s"concat_ws('/', collect_set(b))," + + s"count(distinct(b)) * count(distinct(c))" + + s"from v2_catalog.db.t2 group by a") + assert(ret1 == Lineage( + List("v2_catalog.db.t2"), + List("v2_catalog.db.t1"), + List( + ("v2_catalog.db.t1.a", Set("v2_catalog.db.t2.a")), + ("v2_catalog.db.t1.b", Set("v2_catalog.db.t2.b")), + ("v2_catalog.db.t1.c", Set("v2_catalog.db.t2.b", "v2_catalog.db.t2.c"))))) + + val ret2 = + exectractLineage( + s"insert into table v2_catalog.db.t1 select a," + + s"count(distinct(b+c))," + + s"count(distinct(b)) * count(distinct(c))" + + s"from v2_catalog.db.t2 group by a") + assert(ret2 == Lineage( + List("v2_catalog.db.t2"), + List("v2_catalog.db.t1"), + List( + ("v2_catalog.db.t1.a", Set("v2_catalog.db.t2.a")), + ("v2_catalog.db.t1.b", Set("v2_catalog.db.t2.b", "v2_catalog.db.t2.c")), + ("v2_catalog.db.t1.c", Set("v2_catalog.db.t2.b", "v2_catalog.db.t2.c"))))) + } + } + + test("test grouping sets") { + withTable("t1", "t2") { _ => + spark.sql("CREATE TABLE t1 (a string, b string, c string) USING hive") + spark.sql("CREATE TABLE t2 (a string, b string, c string, d string) USING hive") + val ret0 = + exectractLineage( + s"insert into table t1 select a,b,GROUPING__ID " + + s"from t2 group by a,b,c,d grouping sets ((a,b,c), (a,b,d))") + assert(ret0 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set("default.t2.a")), + ("default.t1.b", Set("default.t2.b")), + ("default.t1.c", Set())))) + } + } + + test("test cache table with window function") { + withTable("t1", "t2") { _ => + spark.sql("CREATE TABLE t1 (a string, b string) USING hive") + spark.sql("CREATE TABLE t2 (a string, b string) USING hive") + + spark.sql( + s"cache table c1 select * from (" + + s"select a, b, row_number() over (partition by a order by b asc ) rank from t2)" + + s" where rank=1") + val ret0 = exectractLineage("insert overwrite table t1 select a, b from c1") + assert(ret0 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set("default.t2.a")), + ("default.t1.b", Set("default.t2.b"))))) + + val ret1 = exectractLineage("insert overwrite table t1 select a, rank from c1") + assert(ret1 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set("default.t2.a")), + ("default.t1.b", Set("default.t2.a", "default.t2.b"))))) + + spark.sql( + s"cache table c2 select * from (" + + s"select b, a, row_number() over (partition by a order by b asc ) rank from t2)" + + s" where rank=1") + val ret2 = exectractLineage("insert overwrite table t1 select a, b from c2") + assert(ret2 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set("default.t2.a")), + ("default.t1.b", Set("default.t2.b"))))) + + spark.sql( + s"cache table c3 select * from (" + + s"select a as aa, b as bb, row_number() over (partition by a order by b asc ) rank" + + s" from t2) where rank=1") + val ret3 = exectractLineage("insert overwrite table t1 select aa, bb from c3") + assert(ret3 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set("default.t2.a")), + ("default.t1.b", Set("default.t2.b"))))) + } + } + + test("test count()") { + withTable("t1", "t2") { _ => + spark.sql("CREATE TABLE t1 (a string, b string, c string) USING hive") + spark.sql("CREATE TABLE t2 (a string, b string, c string) USING hive") + val ret0 = exectractLineage("insert into t1 select 1,2,(select count(distinct" + + " ifnull(get_json_object(a, '$.b.imei'), get_json_object(a, '$.b.android_id'))) from t2)") + + assert(ret0 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set()), + ("default.t1.b", Set()), + ("default.t1.c", Set("default.t2.a"))))) + } + } + + test("test create view from view") { + withTable("t1") { _ => + spark.sql("CREATE TABLE t1 (a string, b string, c string) USING hive") + withView("t2") { _ => + spark.sql("CREATE VIEW t2 as select * from t1") + val ret0 = + exectractLineage( + s"create or replace view view_tst comment 'view'" + + s" as select a as k,b" + + s" from t2" + + s" where a in ('HELLO') and c = 'HELLO'") + assert(ret0 == Lineage( + List("default.t1"), + List("default.view_tst"), + List( + ("default.view_tst.k", Set("default.t1.a")), + ("default.view_tst.b", Set("default.t1.b"))))) + } + } + } + + test("test for skip parsing permanent view") { + withTable("t1") { _ => + SparkContextHelper.setConf(LineageConf.SKIP_PARSING_PERMANENT_VIEW_ENABLED, true) + spark.sql("CREATE TABLE t1 (a string, b string, c string) USING hive") + withView("t2") { _ => + spark.sql("CREATE VIEW t2 as select * from t1") + val ret0 = + exectractLineage( + s"select a as k, b" + + s" from t2" + + s" where a in ('HELLO') and c = 'HELLO'") + assert(ret0 == Lineage( + List("default.t2"), + List(), + List( + ("k", Set("default.t2.a")), + ("b", Set("default.t2.b"))))) + } + } + } + + test("test the statement with FROM xxx INSERT xxx") { + withTable("t1", "t2", "t3") { _ => + spark.sql("CREATE TABLE t1 (a string, b string) USING hive") + spark.sql("CREATE TABLE t2 (a string, b string) USING hive") + spark.sql("CREATE TABLE t3 (a string, b string) USING hive") + val ret0 = exectractLineage("from (select a,b from t1)" + + " insert overwrite table t2 select a,b where a=1" + + " insert overwrite table t3 select a,b where b=1") + assert(ret0 == Lineage( + List("default.t1"), + List("default.t2", "default.t3"), + List( + ("default.t2.a", Set("default.t1.a")), + ("default.t2.b", Set("default.t1.b")), + ("default.t3.a", Set("default.t1.a")), + ("default.t3.b", Set("default.t1.b"))))) + } + } + + test("test lateral view explode") { + withTable("t1", "t2") { _ => + spark.sql("CREATE TABLE t1 (a string, b string, c string, d string) USING hive") + spark.sql("CREATE TABLE t2 (a string, b string, c string, d string) USING hive") + + val ret0 = exectractLineage("insert into t1 select 1, t2.b, cc.action, t2.d " + + "from t2 lateral view explode(split(c,'\\},\\{')) cc as action") + assert(ret0 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set()), + ("default.t1.b", Set("default.t2.b")), + ("default.t1.c", Set("default.t2.c")), + ("default.t1.d", Set("default.t2.d"))))) + + val ret1 = exectractLineage("insert into t1 select 1, t2.b, cc.action0, dd.action1 " + + "from t2 " + + "lateral view explode(split(c,'\\},\\{')) cc as action0 " + + "lateral view explode(split(d,'\\},\\{')) dd as action1") + assert(ret1 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set()), + ("default.t1.b", Set("default.t2.b")), + ("default.t1.c", Set("default.t2.c")), + ("default.t1.d", Set("default.t2.d"))))) + + val ret2 = exectractLineage("insert into t1 select 1, t2.b, dd.pos, dd.action1 " + + "from t2 " + + "lateral view posexplode(split(d,'\\},\\{')) dd as pos, action1") + assert(ret2 == Lineage( + List("default.t2"), + List("default.t1"), + List( + ("default.t1.a", Set()), + ("default.t1.b", Set("default.t2.b")), + ("default.t1.c", Set("default.t2.d")), + ("default.t1.d", Set("default.t2.d"))))) } } @@ -1098,15 +1352,14 @@ class SparkSQLLineageParserHelperSuite extends KyuubiFunSuite val parsed = spark.sessionState.sqlParser.parsePlan(sql) val analyzed = spark.sessionState.analyzer.execute(parsed) spark.sessionState.analyzer.checkAnalysis(analyzed) - val optimized = spark.sessionState.optimizer.execute(analyzed) - SparkSQLLineageParseHelper(spark).transformToLineage(0, optimized).get + SparkSQLLineageParseHelper(spark).transformToLineage(0, analyzed).get } private def exectractLineage(sql: String): Lineage = { val parsed = spark.sessionState.sqlParser.parsePlan(sql) val qe = spark.sessionState.executePlan(parsed) - val optimized = qe.optimizedPlan - SparkSQLLineageParseHelper(spark).transformToLineage(0, optimized).get + val analyzed = qe.analyzed + SparkSQLLineageParseHelper(spark).transformToLineage(0, analyzed).get } } diff --git a/externals/kyuubi-chat-engine/pom.xml b/externals/kyuubi-chat-engine/pom.xml new file mode 100644 index 00000000000..28779f4504f --- /dev/null +++ b/externals/kyuubi-chat-engine/pom.xml @@ -0,0 +1,90 @@ + + + + 4.0.0 + + org.apache.kyuubi + kyuubi-parent + 1.8.0-SNAPSHOT + ../../pom.xml + + + kyuubi-chat-engine_2.12 + jar + Kyuubi Project Engine Chat + https://kyuubi.apache.org/ + + + + + org.apache.kyuubi + kyuubi-common_${scala.binary.version} + ${project.version} + + + + org.apache.kyuubi + kyuubi-ha_${scala.binary.version} + ${project.version} + + + + com.theokanning.openai-gpt3-java + service + ${openai.java.version} + + + + org.apache.kyuubi + kyuubi-common_${scala.binary.version} + ${project.version} + test-jar + test + + + + org.apache.kyuubi + ${hive.jdbc.artifact} + ${project.version} + test + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatBackendService.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatBackendService.scala new file mode 100644 index 00000000000..fdc710e2ccd --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatBackendService.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat + +import org.apache.kyuubi.engine.chat.session.ChatSessionManager +import org.apache.kyuubi.service.AbstractBackendService +import org.apache.kyuubi.session.SessionManager + +class ChatBackendService + extends AbstractBackendService("ChatBackendService") { + + override val sessionManager: SessionManager = new ChatSessionManager() + +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatEngine.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatEngine.scala new file mode 100644 index 00000000000..c1fdea9538c --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatEngine.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat + +import ChatEngine.currentEngine + +import org.apache.kyuubi.{Logging, Utils} +import org.apache.kyuubi.Utils.{addShutdownHook, JDBC_ENGINE_SHUTDOWN_PRIORITY} +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.ha.HighAvailabilityConf.HA_ZK_CONN_RETRY_POLICY +import org.apache.kyuubi.ha.client.RetryPolicies +import org.apache.kyuubi.service.Serverable +import org.apache.kyuubi.util.SignalRegister + +class ChatEngine extends Serverable("ChatEngine") { + + override val backendService = new ChatBackendService() + override val frontendServices = Seq(new ChatTBinaryFrontendService(this)) + + override def start(): Unit = { + super.start() + // Start engine self-terminating checker after all services are ready and it can be reached by + // all servers in engine spaces. + backendService.sessionManager.startTerminatingChecker(() => { + currentEngine.foreach(_.stop()) + }) + } + + override protected def stopServer(): Unit = {} +} + +object ChatEngine extends Logging { + + val kyuubiConf: KyuubiConf = KyuubiConf() + + var currentEngine: Option[ChatEngine] = None + + def startEngine(): Unit = { + currentEngine = Some(new ChatEngine()) + currentEngine.foreach { engine => + engine.initialize(kyuubiConf) + engine.start() + addShutdownHook( + () => { + engine.stop() + }, + JDBC_ENGINE_SHUTDOWN_PRIORITY + 1) + } + } + + def main(args: Array[String]): Unit = { + SignalRegister.registerLogger(logger) + + try { + Utils.fromCommandLineArgs(args, kyuubiConf) + kyuubiConf.setIfMissing(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) + kyuubiConf.setIfMissing(HA_ZK_CONN_RETRY_POLICY, RetryPolicies.N_TIME.toString) + + startEngine() + } catch { + case t: Throwable if currentEngine.isDefined => + currentEngine.foreach { engine => + engine.stop() + } + error("Failed to create Chat Engine", t) + throw t + case t: Throwable => + error("Failed to create Chat Engine.", t) + throw t + } + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatTBinaryFrontendService.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatTBinaryFrontendService.scala new file mode 100644 index 00000000000..80702c97c3c --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ChatTBinaryFrontendService.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat + +import org.apache.kyuubi.ha.client.{EngineServiceDiscovery, ServiceDiscovery} +import org.apache.kyuubi.service.{Serverable, Service, TBinaryFrontendService} + +class ChatTBinaryFrontendService(override val serverable: Serverable) + extends TBinaryFrontendService("ChatTBinaryFrontend") { + + /** + * An optional `ServiceDiscovery` for [[FrontendService]] to expose itself + */ + override lazy val discoveryService: Option[Service] = + if (ServiceDiscovery.supportServiceDiscovery(conf)) { + Some(new EngineServiceDiscovery(this)) + } else { + None + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala new file mode 100644 index 00000000000..38527cbf1f8 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat.operation + +import org.apache.hive.service.rpc.thrift._ + +import org.apache.kyuubi.{KyuubiSQLException, Utils} +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.engine.chat.schema.{RowSet, SchemaHelper} +import org.apache.kyuubi.operation.{AbstractOperation, FetchIterator, OperationState} +import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR, FetchOrientation} +import org.apache.kyuubi.session.Session + +abstract class ChatOperation(session: Session) extends AbstractOperation(session) { + + protected var iter: FetchIterator[Array[String]] = _ + + protected lazy val conf: KyuubiConf = session.sessionManager.getConf + + override def getNextRowSet(order: FetchOrientation, rowSetSize: Int): TRowSet = { + validateDefaultFetchOrientation(order) + assertState(OperationState.FINISHED) + setHasResultSet(true) + order match { + case FETCH_NEXT => + iter.fetchNext() + case FETCH_PRIOR => + iter.fetchPrior(rowSetSize) + case FETCH_FIRST => + iter.fetchAbsolute(0) + } + + val taken = iter.take(rowSetSize) + val resultRowSet = RowSet.toTRowSet(taken.toSeq, 1, getProtocolVersion) + resultRowSet.setStartRowOffset(iter.getPosition) + resultRowSet + } + + override def cancel(): Unit = { + cleanup(OperationState.CANCELED) + } + + override def close(): Unit = { + cleanup(OperationState.CLOSED) + } + + protected def onError(cancel: Boolean = false): PartialFunction[Throwable, Unit] = { + // We should use Throwable instead of Exception since `java.lang.NoClassDefFoundError` + // could be thrown. + case e: Throwable => + state.synchronized { + val errMsg = Utils.stringifyException(e) + if (state == OperationState.TIMEOUT) { + val ke = KyuubiSQLException(s"Timeout operating $opType: $errMsg") + setOperationException(ke) + throw ke + } else if (isTerminalState(state)) { + setOperationException(KyuubiSQLException(errMsg)) + warn(s"Ignore exception in terminal state with $statementId: $errMsg") + } else { + error(s"Error operating $opType: $errMsg", e) + val ke = KyuubiSQLException(s"Error operating $opType: $errMsg", e) + setOperationException(ke) + setState(OperationState.ERROR) + throw ke + } + } + } + + override protected def beforeRun(): Unit = { + setState(OperationState.PENDING) + setHasResultSet(true) + } + + override protected def afterRun(): Unit = {} + + override def getResultSetMetadata: TGetResultSetMetadataResp = { + val tTableSchema = SchemaHelper.stringTTableSchema("reply") + val resp = new TGetResultSetMetadataResp + resp.setSchema(tTableSchema) + resp.setStatus(OK_STATUS) + resp + } + + override def shouldRunAsync: Boolean = false +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperationManager.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperationManager.scala new file mode 100644 index 00000000000..1e89165176e --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperationManager.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat.operation + +import java.util + +import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.engine.chat.provider.ChatProvider +import org.apache.kyuubi.operation.{Operation, OperationManager} +import org.apache.kyuubi.session.Session + +class ChatOperationManager( + conf: KyuubiConf, + chatProvider: ChatProvider) extends OperationManager("ChatOperationManager") { + + override def newExecuteStatementOperation( + session: Session, + statement: String, + confOverlay: Map[String, String], + runAsync: Boolean, + queryTimeout: Long): Operation = { + val executeStatement = + new ExecuteStatement( + session, + statement, + runAsync, + queryTimeout, + chatProvider) + addOperation(executeStatement) + } + + override def newGetTypeInfoOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetCatalogsOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetSchemasOperation( + session: Session, + catalog: String, + schema: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetTablesOperation( + session: Session, + catalogName: String, + schemaName: String, + tableName: String, + tableTypes: util.List[String]): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetTableTypesOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetColumnsOperation( + session: Session, + catalogName: String, + schemaName: String, + tableName: String, + columnName: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetFunctionsOperation( + session: Session, + catalogName: String, + schemaName: String, + functionName: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetPrimaryKeysOperation( + session: Session, + catalogName: String, + schemaName: String, + tableName: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetCrossReferenceOperation( + session: Session, + primaryCatalog: String, + primarySchema: String, + primaryTable: String, + foreignCatalog: String, + foreignSchema: String, + foreignTable: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def getQueryId(operation: Operation): String = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newSetCurrentCatalogOperation(session: Session, catalog: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetCurrentCatalogOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newSetCurrentDatabaseOperation(session: Session, database: String): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def newGetCurrentDatabaseOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ExecuteStatement.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ExecuteStatement.scala new file mode 100644 index 00000000000..754a519324f --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ExecuteStatement.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat.operation + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.engine.chat.provider.ChatProvider +import org.apache.kyuubi.operation.{ArrayFetchIterator, OperationState} +import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.session.Session + +class ExecuteStatement( + session: Session, + override val statement: String, + override val shouldRunAsync: Boolean, + queryTimeout: Long, + chatProvider: ChatProvider) + extends ChatOperation(session) with Logging { + + private val operationLog: OperationLog = OperationLog.createOperationLog(session, getHandle) + override def getOperationLog: Option[OperationLog] = Option(operationLog) + + override protected def runInternal(): Unit = { + addTimeoutMonitor(queryTimeout) + if (shouldRunAsync) { + val asyncOperation = new Runnable { + override def run(): Unit = { + executeStatement() + } + } + val chatSessionManager = session.sessionManager + val backgroundHandle = chatSessionManager.submitBackgroundOperation(asyncOperation) + setBackgroundHandle(backgroundHandle) + } else { + executeStatement() + } + } + + private def executeStatement(): Unit = { + setState(OperationState.RUNNING) + + try { + val reply = chatProvider.ask(session.handle.identifier.toString, statement) + iter = new ArrayFetchIterator(Array(Array(reply))) + + setState(OperationState.FINISHED) + } catch { + onError(true) + } finally { + shutdownTimeoutMonitor() + } + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ChatGPTProvider.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ChatGPTProvider.scala new file mode 100644 index 00000000000..cdea89d2aad --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ChatGPTProvider.scala @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.provider + +import java.net.{InetSocketAddress, Proxy, URL} +import java.time.Duration +import java.util +import java.util.concurrent.TimeUnit + +import scala.collection.JavaConverters._ + +import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} +import com.theokanning.openai.OpenAiApi +import com.theokanning.openai.completion.chat.{ChatCompletionRequest, ChatMessage} +import com.theokanning.openai.service.OpenAiService +import com.theokanning.openai.service.OpenAiService.{defaultClient, defaultObjectMapper, defaultRetrofit} + +import org.apache.kyuubi.config.KyuubiConf + +class ChatGPTProvider(conf: KyuubiConf) extends ChatProvider { + + private val gptApiKey = conf.get(KyuubiConf.ENGINE_CHAT_GPT_API_KEY).getOrElse { + throw new IllegalArgumentException( + s"'${KyuubiConf.ENGINE_CHAT_GPT_API_KEY.key}' must be configured, " + + s"which could be got at https://platform.openai.com/account/api-keys") + } + + private val openAiService: OpenAiService = { + val builder = defaultClient( + gptApiKey, + Duration.ofMillis(conf.get(KyuubiConf.ENGINE_CHAT_GPT_HTTP_SOCKET_TIMEOUT))) + .newBuilder + .connectTimeout(Duration.ofMillis(conf.get(KyuubiConf.ENGINE_CHAT_GPT_HTTP_CONNECT_TIMEOUT))) + + conf.get(KyuubiConf.ENGINE_CHAT_GPT_HTTP_PROXY) match { + case Some(httpProxyUrl) => + val url = new URL(httpProxyUrl) + val proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(url.getHost, url.getPort)) + builder.proxy(proxy) + case _ => + } + + val retrofit = defaultRetrofit(builder.build(), defaultObjectMapper) + val api = retrofit.create(classOf[OpenAiApi]) + new OpenAiService(api) + } + + private val chatHistory: LoadingCache[String, util.ArrayDeque[ChatMessage]] = + CacheBuilder.newBuilder() + .expireAfterWrite(10, TimeUnit.MINUTES) + .build(new CacheLoader[String, util.ArrayDeque[ChatMessage]] { + override def load(sessionId: String): util.ArrayDeque[ChatMessage] = + new util.ArrayDeque[ChatMessage] + }) + + override def open(sessionId: String): Unit = { + chatHistory.getIfPresent(sessionId) + } + + override def ask(sessionId: String, q: String): String = { + val messages = chatHistory.get(sessionId) + try { + messages.addLast(new ChatMessage("user", q)) + val completionRequest = ChatCompletionRequest.builder() + .model(conf.get(KyuubiConf.ENGINE_CHAT_GPT_MODEL)) + .messages(messages.asScala.toList.asJava) + .build() + val responseText = openAiService.createChatCompletion(completionRequest).getChoices.asScala + .map(c => c.getMessage.getContent).mkString + responseText + } catch { + case e: Throwable => + messages.removeLast() + s"Chat failed. Error: ${e.getMessage}" + } + } + + override def close(sessionId: String): Unit = { + chatHistory.invalidate(sessionId) + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ChatProvider.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ChatProvider.scala new file mode 100644 index 00000000000..af1ba434bea --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ChatProvider.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.provider + +import scala.util.control.NonFatal + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule} + +import org.apache.kyuubi.{KyuubiException, Logging} +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.reflection.DynConstructors + +trait ChatProvider { + + def open(sessionId: String): Unit + + def ask(sessionId: String, q: String): String + + def close(sessionId: String): Unit +} + +object ChatProvider extends Logging { + + val mapper: ObjectMapper with ClassTagExtensions = + new ObjectMapper().registerModule(DefaultScalaModule) :: ClassTagExtensions + + def load(conf: KyuubiConf): ChatProvider = { + val groupProviderClass = conf.get(KyuubiConf.ENGINE_CHAT_PROVIDER) + try { + DynConstructors.builder(classOf[ChatProvider]) + .impl(groupProviderClass, classOf[KyuubiConf]) + .impl(groupProviderClass) + .buildChecked + .newInstanceChecked(conf) + } catch { + case _: ClassCastException => + throw new KyuubiException( + s"Class $groupProviderClass is not a child of '${classOf[ChatProvider].getName}'.") + case NonFatal(e) => + throw new IllegalArgumentException(s"Error while instantiating '$groupProviderClass': ", e) + } + } +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiScalaObjectMapper.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/EchoProvider.scala similarity index 66% rename from kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiScalaObjectMapper.scala rename to externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/EchoProvider.scala index 915b109b7b9..31ad3b8e390 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiScalaObjectMapper.scala +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/EchoProvider.scala @@ -15,15 +15,14 @@ * limitations under the License. */ -package org.apache.kyuubi.server.trino.api +package org.apache.kyuubi.engine.chat.provider -import javax.ws.rs.ext.ContextResolver +class EchoProvider extends ChatProvider { -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule + override def open(sessionId: String): Unit = {} -class KyuubiScalaObjectMapper extends ContextResolver[ObjectMapper] { - private val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + override def ask(sessionId: String, q: String): String = + "This is ChatKyuubi, nice to meet you!" - override def getContext(aClass: Class[_]): ObjectMapper = mapper + override def close(sessionId: String): Unit = {} } diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/Message.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/Message.scala new file mode 100644 index 00000000000..e2162be9f1a --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/Message.scala @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.provider + +case class Message(role: String, content: String) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/RowSet.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/RowSet.scala new file mode 100644 index 00000000000..3bb4ba7dfa9 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/RowSet.scala @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.schema + +import java.util + +import org.apache.hive.service.rpc.thrift._ + +import org.apache.kyuubi.util.RowSetUtils._ + +object RowSet { + + def emptyTRowSet(): TRowSet = { + new TRowSet(0, new java.util.ArrayList[TRow](0)) + } + + def toTRowSet( + rows: Seq[Array[String]], + columnSize: Int, + protocolVersion: TProtocolVersion): TRowSet = { + if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { + toRowBasedSet(rows, columnSize) + } else { + toColumnBasedSet(rows, columnSize) + } + } + + def toRowBasedSet(rows: Seq[Array[String]], columnSize: Int): TRowSet = { + val rowSize = rows.length + val tRows = new java.util.ArrayList[TRow](rowSize) + var i = 0 + while (i < rowSize) { + val row = rows(i) + val tRow = new TRow() + var j = 0 + val columnSize = row.length + while (j < columnSize) { + val columnValue = stringTColumnValue(j, row) + tRow.addToColVals(columnValue) + j += 1 + } + i += 1 + tRows.add(tRow) + } + new TRowSet(0, tRows) + } + + def toColumnBasedSet(rows: Seq[Array[String]], columnSize: Int): TRowSet = { + val rowSize = rows.length + val tRowSet = new TRowSet(0, new util.ArrayList[TRow](rowSize)) + var i = 0 + while (i < columnSize) { + val tColumn = toTColumn(rows, i) + tRowSet.addToColumns(tColumn) + i += 1 + } + tRowSet + } + + private def toTColumn(rows: Seq[Array[String]], ordinal: Int): TColumn = { + val nulls = new java.util.BitSet() + val values = getOrSetAsNull[String](rows, ordinal, nulls, "") + TColumn.stringVal(new TStringColumn(values, nulls)) + } + + private def getOrSetAsNull[String]( + rows: Seq[Array[String]], + ordinal: Int, + nulls: util.BitSet, + defaultVal: String): util.List[String] = { + val size = rows.length + val ret = new util.ArrayList[String](size) + var idx = 0 + while (idx < size) { + val row = rows(idx) + val isNull = row(ordinal) == null + if (isNull) { + nulls.set(idx, true) + ret.add(idx, defaultVal) + } else { + ret.add(idx, row(ordinal)) + } + idx += 1 + } + ret + } + + private def stringTColumnValue(ordinal: Int, row: Array[String]): TColumnValue = { + val tStringValue = new TStringValue + if (row(ordinal) != null) tStringValue.setValue(row(ordinal)) + TColumnValue.stringVal(tStringValue) + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala new file mode 100644 index 00000000000..8ccfdda2fe9 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.schema + +import java.util.Collections + +import org.apache.hive.service.rpc.thrift._ + +object SchemaHelper { + + def stringTTypeQualifiers: TTypeQualifiers = { + val ret = new TTypeQualifiers() + val qualifiers = Collections.emptyMap[String, TTypeQualifierValue]() + ret.setQualifiers(qualifiers) + ret + } + + def stringTTypeDesc: TTypeDesc = { + val typeEntry = new TPrimitiveTypeEntry(TTypeId.STRING_TYPE) + typeEntry.setTypeQualifiers(stringTTypeQualifiers) + val tTypeDesc = new TTypeDesc() + tTypeDesc.addToTypes(TTypeEntry.primitiveEntry(typeEntry)) + tTypeDesc + } + + def stringTColumnDesc(fieldName: String, pos: Int): TColumnDesc = { + val tColumnDesc = new TColumnDesc() + tColumnDesc.setColumnName(fieldName) + tColumnDesc.setTypeDesc(stringTTypeDesc) + tColumnDesc.setPosition(pos) + tColumnDesc + } + + def stringTTableSchema(fieldsName: String*): TTableSchema = { + val tTableSchema = new TTableSchema() + fieldsName.zipWithIndex.foreach { case (f, i) => + tTableSchema.addToColumns(stringTColumnDesc(f, i)) + } + tTableSchema + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala new file mode 100644 index 00000000000..29f42076822 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat.session + +import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} + +import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException} +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY +import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} + +class ChatSessionImpl( + protocol: TProtocolVersion, + user: String, + password: String, + ipAddress: String, + conf: Map[String, String], + sessionManager: SessionManager) + extends AbstractSession(protocol, user, password, ipAddress, conf, sessionManager) { + + override val handle: SessionHandle = + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) + + private val chatProvider = sessionManager.asInstanceOf[ChatSessionManager].chatProvider + + override def open(): Unit = { + info(s"Starting to open chat session.") + chatProvider.open(handle.identifier.toString) + super.open() + info(s"The chat session is started.") + } + + override def getInfo(infoType: TGetInfoType): TGetInfoValue = withAcquireRelease() { + infoType match { + case TGetInfoType.CLI_SERVER_NAME | TGetInfoType.CLI_DBMS_NAME => + TGetInfoValue.stringValue("Kyuubi Chat Engine") + case TGetInfoType.CLI_DBMS_VER => + TGetInfoValue.stringValue(KYUUBI_VERSION) + case TGetInfoType.CLI_ODBC_KEYWORDS => TGetInfoValue.stringValue("Unimplemented") + case TGetInfoType.CLI_MAX_COLUMN_NAME_LEN => + TGetInfoValue.lenValue(128) + case TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN => + TGetInfoValue.lenValue(128) + case TGetInfoType.CLI_MAX_TABLE_NAME_LEN => + TGetInfoValue.lenValue(128) + case _ => throw KyuubiSQLException(s"Unrecognized GetInfoType value: $infoType") + } + } + + override def close(): Unit = { + chatProvider.close(handle.identifier.toString) + super.close() + } + +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala new file mode 100644 index 00000000000..33a9dd45066 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat.session + +import org.apache.hive.service.rpc.thrift.TProtocolVersion + +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY +import org.apache.kyuubi.engine.ShareLevel +import org.apache.kyuubi.engine.chat.ChatEngine +import org.apache.kyuubi.engine.chat.operation.ChatOperationManager +import org.apache.kyuubi.engine.chat.provider.ChatProvider +import org.apache.kyuubi.operation.OperationManager +import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} + +class ChatSessionManager(name: String) + extends SessionManager(name) { + + def this() = this(classOf[ChatSessionManager].getSimpleName) + + override protected def isServer: Boolean = false + + lazy val chatProvider: ChatProvider = ChatProvider.load(conf) + + override lazy val operationManager: OperationManager = + new ChatOperationManager(conf, chatProvider) + + override def initialize(conf: KyuubiConf): Unit = { + this.conf = conf + super.initialize(conf) + } + + override protected def createSession( + protocol: TProtocolVersion, + user: String, + password: String, + ipAddress: String, + conf: Map[String, String]): Session = { + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID) + .flatMap(getSessionOption).getOrElse { + new ChatSessionImpl(protocol, user, password, ipAddress, conf, this) + } + } + + override def closeSession(sessionHandle: SessionHandle): Unit = { + super.closeSession(sessionHandle) + if (conf.get(ENGINE_SHARE_LEVEL) == ShareLevel.CONNECTION.toString) { + info("Session stopped due to shared level is Connection.") + stopSession() + } + } + + private def stopSession(): Unit = { + ChatEngine.currentEngine.foreach(_.stop()) + } +} diff --git a/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml new file mode 100644 index 00000000000..585a12c6f99 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/externals/kyuubi-chat-engine/src/test/scala/org/apache/kyuubi/engine/chat/WithChatEngine.scala b/externals/kyuubi-chat-engine/src/test/scala/org/apache/kyuubi/engine/chat/WithChatEngine.scala new file mode 100644 index 00000000000..287fdde2fb5 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/test/scala/org/apache/kyuubi/engine/chat/WithChatEngine.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.chat + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +trait WithChatEngine extends KyuubiFunSuite { + + protected var engine: ChatEngine = _ + protected var connectionUrl: String = _ + + protected val kyuubiConf: KyuubiConf = ChatEngine.kyuubiConf + + def withKyuubiConf: Map[String, String] + + override def beforeAll(): Unit = { + super.beforeAll() + startChatEngine() + } + + override def afterAll(): Unit = { + stopChatEngine() + super.afterAll() + } + + def stopChatEngine(): Unit = { + if (engine != null) { + engine.stop() + engine = null + } + } + + def startChatEngine(): Unit = { + withKyuubiConf.foreach { case (k, v) => + System.setProperty(k, v) + kyuubiConf.set(k, v) + } + ChatEngine.startEngine() + engine = ChatEngine.currentEngine.get + connectionUrl = engine.frontendServices.head.connectionUrl + } + + protected def jdbcConnectionUrl: String = s"jdbc:hive2://$connectionUrl/;" + +} diff --git a/externals/kyuubi-chat-engine/src/test/scala/org/apache/kyuubi/engine/chat/operation/ChatOperationSuite.scala b/externals/kyuubi-chat-engine/src/test/scala/org/apache/kyuubi/engine/chat/operation/ChatOperationSuite.scala new file mode 100644 index 00000000000..b14407a267b --- /dev/null +++ b/externals/kyuubi-chat-engine/src/test/scala/org/apache/kyuubi/engine/chat/operation/ChatOperationSuite.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.operation + +import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.engine.chat.WithChatEngine +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class ChatOperationSuite extends HiveJDBCTestHelper with WithChatEngine { + + override def withKyuubiConf: Map[String, String] = Map( + ENGINE_CHAT_PROVIDER.key -> "echo") + + override protected def jdbcUrl: String = jdbcConnectionUrl + + test("test echo chat provider") { + withJdbcStatement() { stmt => + val result = stmt.executeQuery("Hello, Kyuubi") + assert(result.next()) + val expected = "This is ChatKyuubi, nice to meet you!" + assert(result.getString("reply") === expected) + assert(!result.next()) + } + } +} diff --git a/externals/kyuubi-download/pom.xml b/externals/kyuubi-download/pom.xml index b0479f7edc8..d7f0c601322 100644 --- a/externals/kyuubi-download/pom.xml +++ b/externals/kyuubi-download/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml @@ -36,6 +36,7 @@ com.googlecode.maven-download-plugin download-maven-plugin + ${maven.plugin.download.cache.path} ${project.build.directory} 60000 3 diff --git a/externals/kyuubi-flink-sql-engine/pom.xml b/externals/kyuubi-flink-sql-engine/pom.xml index c939936070b..f3633b904f5 100644 --- a/externals/kyuubi-flink-sql-engine/pom.xml +++ b/externals/kyuubi-flink-sql-engine/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml @@ -59,19 +59,19 @@ org.apache.flink - flink-streaming-java${flink.module.scala.suffix} + flink-streaming-java provided org.apache.flink - flink-clients${flink.module.scala.suffix} + flink-clients provided org.apache.flink - flink-sql-client${flink.module.scala.suffix} + flink-sql-client provided @@ -89,7 +89,7 @@ org.apache.flink - flink-table-api-java-bridge${flink.module.scala.suffix} + flink-table-api-java-bridge provided @@ -101,7 +101,7 @@ org.apache.flink - flink-table-runtime${flink.module.scala.suffix} + flink-table-runtime provided @@ -128,7 +128,7 @@ org.apache.flink - flink-test-utils${flink.module.scala.suffix} + flink-test-utils test diff --git a/externals/kyuubi-flink-sql-engine/src/main/java/org/apache/kyuubi/engine/flink/result/Constants.java b/externals/kyuubi-flink-sql-engine/src/main/java/org/apache/kyuubi/engine/flink/result/Constants.java deleted file mode 100644 index b683eb76afa..00000000000 --- a/externals/kyuubi-flink-sql-engine/src/main/java/org/apache/kyuubi/engine/flink/result/Constants.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kyuubi.engine.flink.result; - -/** Constant column names. */ -public class Constants { - - public static final String TABLE_TYPE = "TABLE"; - public static final String VIEW_TYPE = "VIEW"; - - public static final String[] SUPPORTED_TABLE_TYPES = new String[] {TABLE_TYPE, VIEW_TYPE}; -} diff --git a/externals/kyuubi-flink-sql-engine/src/main/java/org/apache/kyuubi/engine/flink/result/ResultSet.java b/externals/kyuubi-flink-sql-engine/src/main/java/org/apache/kyuubi/engine/flink/result/ResultSet.java deleted file mode 100644 index 66f03a159b9..00000000000 --- a/externals/kyuubi-flink-sql-engine/src/main/java/org/apache/kyuubi/engine/flink/result/ResultSet.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kyuubi.engine.flink.result; - -import com.google.common.collect.Iterators; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; -import javax.annotation.Nullable; -import org.apache.flink.table.api.ResultKind; -import org.apache.flink.table.api.TableResult; -import org.apache.flink.table.catalog.Column; -import org.apache.flink.table.catalog.ResolvedSchema; -import org.apache.flink.types.Row; -import org.apache.flink.util.Preconditions; -import org.apache.kyuubi.operation.ArrayFetchIterator; -import org.apache.kyuubi.operation.FetchIterator; - -/** - * A set of one statement execution result containing result kind, columns, rows of data and change - * flags for streaming mode. - */ -public class ResultSet { - - private final ResultKind resultKind; - private final List columns; - private final FetchIterator data; - - // null in batch mode - // - // list of boolean in streaming mode, - // true if the corresponding row is an append row, false if its a retract row - private final List changeFlags; - - private ResultSet( - ResultKind resultKind, - List columns, - FetchIterator data, - @Nullable List changeFlags) { - this.resultKind = Preconditions.checkNotNull(resultKind, "resultKind must not be null"); - this.columns = Preconditions.checkNotNull(columns, "columns must not be null"); - this.data = Preconditions.checkNotNull(data, "data must not be null"); - this.changeFlags = changeFlags; - if (changeFlags != null) { - Preconditions.checkArgument( - Iterators.size((Iterator) data) == changeFlags.size(), - "the size of data and the size of changeFlags should be equal"); - } - } - - public List getColumns() { - return columns; - } - - public FetchIterator getData() { - return data; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - ResultSet resultSet = (ResultSet) o; - return resultKind.equals(resultSet.resultKind) - && columns.equals(resultSet.columns) - && data.equals(resultSet.data) - && Objects.equals(changeFlags, resultSet.changeFlags); - } - - @Override - public int hashCode() { - return Objects.hash(resultKind, columns, data, changeFlags); - } - - @Override - public String toString() { - return "ResultSet{" - + "resultKind=" - + resultKind - + ", columns=" - + columns - + ", data=" - + data - + ", changeFlags=" - + changeFlags - + '}'; - } - - public static ResultSet fromTableResult(TableResult tableResult) { - ResolvedSchema schema = tableResult.getResolvedSchema(); - // collect all rows from table result as list - // this is ok as TableResult contains limited rows - List rows = new ArrayList<>(); - tableResult.collect().forEachRemaining(rows::add); - return builder() - .resultKind(tableResult.getResultKind()) - .columns(schema.getColumns()) - .data(rows.toArray(new Row[0])) - .build(); - } - - public static Builder builder() { - return new Builder(); - } - - /** Builder for {@link ResultSet}. */ - public static class Builder { - private ResultKind resultKind = null; - private List columns = null; - private FetchIterator data = null; - private List changeFlags = null; - - private Builder() {} - - /** Set {@link ResultKind}. */ - public Builder resultKind(ResultKind resultKind) { - this.resultKind = resultKind; - return this; - } - - /** Set columns. */ - public Builder columns(Column... columns) { - this.columns = Arrays.asList(columns); - return this; - } - - /** Set columns. */ - public Builder columns(List columns) { - this.columns = columns; - return this; - } - - /** Set data. */ - public Builder data(FetchIterator data) { - this.data = data; - return this; - } - - /** Set data. */ - public Builder data(Row[] data) { - this.data = new ArrayFetchIterator<>(data); - return this; - } - - /** Set change flags. */ - public Builder changeFlags(List changeFlags) { - this.changeFlags = changeFlags; - return this; - } - - /** Returns a {@link ResultSet} instance. */ - public ResultSet build() { - return new ResultSet(resultKind, columns, data, changeFlags); - } - } -} diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkEngineUtils.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkEngineUtils.scala index e271944a7c0..69fc8c69573 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkEngineUtils.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkEngineUtils.scala @@ -40,7 +40,7 @@ object FlinkEngineUtils extends Logging { val EMBEDDED_MODE_CLIENT_OPTIONS: Options = getEmbeddedModeClientOptions(new Options); val SUPPORTED_FLINK_VERSIONS: Array[SemanticVersion] = - Array("1.14", "1.15", "1.16").map(SemanticVersion.apply) + Array("1.15", "1.16").map(SemanticVersion.apply) def checkFlinkVersion(): Unit = { val flinkVersion = EnvironmentInformation.getVersion diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala index 93d013556e1..0438b98d1ad 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala @@ -17,14 +17,14 @@ package org.apache.kyuubi.engine.flink.operation -import java.time.LocalDate +import java.time.{LocalDate, LocalTime} import java.util import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.flink.api.common.JobID -import org.apache.flink.table.api.{ResultKind, TableResult} +import org.apache.flink.table.api.ResultKind import org.apache.flink.table.client.gateway.TypedResult import org.apache.flink.table.data.{GenericArrayData, GenericMapData, RowData} import org.apache.flink.table.data.binary.{BinaryArrayData, BinaryMapData} @@ -120,18 +120,8 @@ class ExecuteStatement( case TypedResult.ResultType.PAYLOAD => (1 to result.getPayload).foreach { page => if (rows.size < resultMaxRows) { - // FLINK-24461 retrieveResultPage method changes the return type from Row to RowData - val retrieveResultPage = DynMethods.builder("retrieveResultPage") - .impl(executor.getClass, classOf[String], classOf[Int]) - .build(executor) - val _page = Integer.valueOf(page) - if (isFlinkVersionEqualTo("1.14")) { - val result = retrieveResultPage.invoke[util.List[Row]](resultId, _page) - rows ++= result.asScala - } else if (isFlinkVersionAtLeast("1.15")) { - val result = retrieveResultPage.invoke[util.List[RowData]](resultId, _page) - rows ++= result.asScala.map(r => convertToRow(r, dataTypes)) - } + val result = executor.retrieveResultPage(resultId, page) + rows ++= result.asScala.map(r => convertToRow(r, dataTypes)) } else { loop = false } @@ -154,14 +144,10 @@ class ExecuteStatement( } private def runOperation(operation: Operation): Unit = { - // FLINK-24461 executeOperation method changes the return type - // from TableResult to TableResultInternal - val executeOperation = DynMethods.builder("executeOperation") - .impl(executor.getClass, classOf[String], classOf[Operation]) - .build(executor) - val result = executeOperation.invoke[TableResult](sessionId, operation) + val result = executor.executeOperation(sessionId, operation) jobId = result.getJobClient.asScala.map(_.getJobID) - result.await() + // after FLINK-24461, TableResult#await() would block insert statements + // until the job finishes, instead of returning row affected immediately resultSet = ResultSet.fromTableResult(result) } @@ -204,6 +190,9 @@ class ExecuteStatement( case _: DateType => val date = RowSetUtils.formatLocalDate(LocalDate.ofEpochDay(r.getInt(i))) row.setField(i, date) + case _: TimeType => + val time = RowSetUtils.formatLocalTime(LocalTime.ofNanoOfDay(r.getLong(i) * 1000 * 1000)) + row.setField(i, time) case t: TimestampType => val ts = RowSetUtils .formatLocalDateTime(r.getTimestamp(i, t.getPrecision) diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/Constants.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/Constants.scala new file mode 100644 index 00000000000..ca582b2e3f3 --- /dev/null +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/Constants.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.flink.result + +object Constants { + val TABLE_TYPE: String = "TABLE" + val VIEW_TYPE: String = "VIEW" + val SUPPORTED_TABLE_TYPES: Array[String] = Array[String](TABLE_TYPE, VIEW_TYPE) +} diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala new file mode 100644 index 00000000000..13673381258 --- /dev/null +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.flink.result + +import java.util + +import scala.collection.JavaConverters._ + +import com.google.common.collect.Iterators +import org.apache.flink.table.api.{ResultKind, TableResult} +import org.apache.flink.table.catalog.Column +import org.apache.flink.types.Row + +import org.apache.kyuubi.operation.{ArrayFetchIterator, FetchIterator} + +case class ResultSet( + resultKind: ResultKind, + columns: util.List[Column], + data: FetchIterator[Row], + // null in batch mode + // list of boolean in streaming mode, + // true if the corresponding row is an append row, false if its a retract row + changeFlags: Option[util.List[Boolean]]) { + + require(resultKind != null, "resultKind must not be null") + require(columns != null, "columns must not be null") + require(data != null, "data must not be null") + changeFlags.foreach { flags => + require( + Iterators.size(data.asInstanceOf[util.Iterator[_]]) == flags.size, + "the size of data and the size of changeFlags should be equal") + } + + def getColumns: util.List[Column] = columns + + def getData: FetchIterator[Row] = data +} + +/** + * A set of one statement execution result containing result kind, columns, rows of data and change + * flags for streaming mode. + */ +object ResultSet { + + def fromTableResult(tableResult: TableResult): ResultSet = { + val schema = tableResult.getResolvedSchema + // collect all rows from table result as list + // this is ok as TableResult contains limited rows + val rows = tableResult.collect.asScala.toArray + builder.resultKind(tableResult.getResultKind) + .columns(schema.getColumns) + .data(rows) + .build + } + + def builder: Builder = new ResultSet.Builder + + class Builder { + private var resultKind: ResultKind = _ + private var columns: util.List[Column] = _ + private var data: FetchIterator[Row] = _ + private var changeFlags: Option[util.List[Boolean]] = None + + def resultKind(resultKind: ResultKind): ResultSet.Builder = { + this.resultKind = resultKind + this + } + + def columns(columns: Column*): ResultSet.Builder = { + this.columns = columns.asJava + this + } + + def columns(columns: util.List[Column]): ResultSet.Builder = { + this.columns = columns + this + } + + def data(data: FetchIterator[Row]): ResultSet.Builder = { + this.data = data + this + } + + def data(data: Array[Row]): ResultSet.Builder = { + this.data = new ArrayFetchIterator[Row](data) + this + } + + def changeFlags(changeFlags: util.List[Boolean]): ResultSet.Builder = { + this.changeFlags = Some(changeFlags) + this + } + + def build: ResultSet = new ResultSet(resultKind, columns, data, changeFlags) + } +} diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala index 2b3ae50b76e..ad83f9c2ba2 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala @@ -307,6 +307,7 @@ object RowSet { case _: MapType => TTypeId.MAP_TYPE case _: RowType => TTypeId.STRUCT_TYPE case _: BinaryType => TTypeId.BINARY_TYPE + case _: TimeType => TTypeId.STRING_TYPE case t @ (_: ZonedTimestampType | _: LocalZonedTimestampType | _: MultisetType | _: YearMonthIntervalType | _: DayTimeIntervalType) => throw new IllegalArgumentException( diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala index 8a3fc7446cf..07971e39fae 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala @@ -21,6 +21,7 @@ import org.apache.flink.table.client.gateway.context.DefaultContext import org.apache.flink.table.client.gateway.local.LocalExecutor import org.apache.hive.service.rpc.thrift.TProtocolVersion +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.flink.operation.FlinkSQLOperationManager import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} @@ -43,14 +44,17 @@ class FlinkSQLSessionManager(engineContext: DefaultContext) password: String, ipAddress: String, conf: Map[String, String]): Session = { - new FlinkSessionImpl( - protocol, - user, - password, - ipAddress, - conf, - this, - executor) + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).flatMap( + getSessionOption).getOrElse { + new FlinkSessionImpl( + protocol, + user, + password, + ipAddress, + conf, + this, + executor) + } } override def closeSession(sessionHandle: SessionHandle): Unit = { diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala index 03d9ce42e7f..75087b48ca2 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala @@ -26,8 +26,9 @@ import org.apache.flink.table.client.gateway.local.LocalExecutor import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.flink.FlinkEngineUtils -import org.apache.kyuubi.session.{AbstractSession, SessionManager} +import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} class FlinkSessionImpl( protocol: TProtocolVersion, @@ -39,6 +40,9 @@ class FlinkSessionImpl( val executor: LocalExecutor) extends AbstractSession(protocol, user, password, ipAddress, conf, sessionManager) { + override val handle: SessionHandle = + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) + lazy val sessionContext: SessionContext = { FlinkEngineUtils.getSessionContext(executor, handle.identifier.toString) } diff --git a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala index c75124c3947..5026fd41175 100644 --- a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala +++ b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala @@ -30,7 +30,6 @@ import org.scalatest.time.SpanSugar._ import org.apache.kyuubi.Utils import org.apache.kyuubi.config.KyuubiConf._ -import org.apache.kyuubi.engine.flink.FlinkEngineUtils._ import org.apache.kyuubi.engine.flink.WithFlinkSQLEngine import org.apache.kyuubi.engine.flink.result.Constants import org.apache.kyuubi.engine.flink.util.TestUserClassLoaderJar @@ -756,28 +755,34 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { } } - test("execute statement - select array") { + test("execute statement - select time") { withJdbcStatement() { statement => val resultSet = - statement.executeQuery("select array ['v1', 'v2', 'v3']") + statement.executeQuery( + "select time '00:00:03', time '00:00:05.123456789'") + val metaData = resultSet.getMetaData + assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR) + assert(metaData.getColumnType(2) === java.sql.Types.VARCHAR) + assert(resultSet.next()) + assert(resultSet.getString(1) == "00:00:03") + assert(resultSet.getString(2) == "00:00:05.123") + } + } + + test("execute statement - select array") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery("select array ['v1', 'v2', 'v3']") val metaData = resultSet.getMetaData assert(metaData.getColumnType(1) === java.sql.Types.ARRAY) assert(resultSet.next()) - if (isFlinkVersionEqualTo("1.14")) { - val expected = """["v1","v2","v3"]""" - assert(resultSet.getObject(1).toString == expected) - } - if (isFlinkVersionAtLeast("1.15")) { - val expected = "[v1,v2,v3]" - assert(resultSet.getObject(1).toString == expected) - } + val expected = "[v1,v2,v3]" + assert(resultSet.getObject(1).toString == expected) } } test("execute statement - select map") { withJdbcStatement() { statement => - val resultSet = - statement.executeQuery("select map ['k1', 'v1', 'k2', 'v2']") + val resultSet = statement.executeQuery("select map ['k1', 'v1', 'k2', 'v2']") assert(resultSet.next()) assert(resultSet.getString(1) == "{k1=v1, k2=v2}") val metaData = resultSet.getMetaData @@ -787,17 +792,10 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { test("execute statement - select row") { withJdbcStatement() { statement => - val resultSet = - statement.executeQuery("select (1, '2', true)") + val resultSet = statement.executeQuery("select (1, '2', true)") assert(resultSet.next()) - if (isFlinkVersionEqualTo("1.14")) { - val expected = """{INT NOT NULL:1,CHAR(1) NOT NULL:"2",BOOLEAN NOT NULL:true}""" - assert(resultSet.getString(1) == expected) - } - if (isFlinkVersionAtLeast("1.15")) { - val expected = """{INT NOT NULL:1,CHAR(1) NOT NULL:2,BOOLEAN NOT NULL:true}""" - assert(resultSet.getString(1) == expected) - } + val expected = """{INT NOT NULL:1,CHAR(1) NOT NULL:2,BOOLEAN NOT NULL:true}""" + assert(resultSet.getString(1) == expected) val metaData = resultSet.getMetaData assert(metaData.getColumnType(1) === java.sql.Types.STRUCT) } @@ -807,25 +805,20 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { withJdbcStatement() { statement => val resultSet = statement.executeQuery("select encode('kyuubi', 'UTF-8')") assert(resultSet.next()) - if (isFlinkVersionEqualTo("1.14")) { - assert(resultSet.getString(1) == "kyuubi") - } - if (isFlinkVersionAtLeast("1.15")) { - // TODO: validate table results after FLINK-28882 is resolved - assert(resultSet.getString(1) == "k") - } + // TODO: validate table results after FLINK-28882 is resolved + assert(resultSet.getString(1) == "k") val metaData = resultSet.getMetaData assert(metaData.getColumnType(1) === java.sql.Types.BINARY) } } test("execute statement - select float") { - withJdbcStatement()({ statement => + withJdbcStatement() { statement => val resultSet = statement.executeQuery("SELECT cast(0.1 as float)") assert(resultSet.next()) assert(resultSet.getString(1) == "0.1") assert(resultSet.getFloat(1) == 0.1f) - }) + } } test("execute statement - select count") { @@ -876,20 +869,15 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { } test("execute statement - create/drop catalog") { - withJdbcStatement()({ statement => - val createResult = { + withJdbcStatement() { statement => + val createResult = statement.executeQuery("create catalog cat_a with ('type'='generic_in_memory')") - } - if (isFlinkVersionAtLeast("1.15")) { - assert(createResult.next()) - assert(createResult.getString(1) === "OK") - } + assert(createResult.next()) + assert(createResult.getString(1) === "OK") val dropResult = statement.executeQuery("drop catalog cat_a") - if (isFlinkVersionAtLeast("1.15")) { - assert(dropResult.next()) - assert(dropResult.getString(1) === "OK") - } - }) + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + } } test("execute statement - set/get catalog") { @@ -903,36 +891,31 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { statement.getConnection.setCatalog("cat_a") val changedCatalog = statement.getConnection.getCatalog assert(changedCatalog == "cat_a") + statement.getConnection.setCatalog("default_catalog") assert(statement.execute("drop catalog cat_a")) } } } test("execute statement - create/alter/drop database") { - withJdbcStatement()({ statement => + withJdbcStatement() { statement => val createResult = statement.executeQuery("create database db_a") - if (isFlinkVersionAtLeast("1.15")) { - assert(createResult.next()) - assert(createResult.getString(1) === "OK") - } + assert(createResult.next()) + assert(createResult.getString(1) === "OK") val alterResult = statement.executeQuery("alter database db_a set ('k1' = 'v1')") - if (isFlinkVersionAtLeast("1.15")) { - assert(alterResult.next()) - assert(alterResult.getString(1) === "OK") - } + assert(alterResult.next()) + assert(alterResult.getString(1) === "OK") val dropResult = statement.executeQuery("drop database db_a") - if (isFlinkVersionAtLeast("1.15")) { - assert(dropResult.next()) - assert(dropResult.getString(1) === "OK") - } - }) + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + } } test("execute statement - set/get database") { withSessionConf()( Map(ENGINE_OPERATION_CONVERT_CATALOG_DATABASE_ENABLED.key -> "true"))( Map.empty) { - withJdbcStatement()({ statement => + withJdbcStatement() { statement => statement.executeQuery("create database db_a") val schema = statement.getConnection.getSchema assert(schema == "default_database") @@ -940,54 +923,41 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { val changedSchema = statement.getConnection.getSchema assert(changedSchema == "db_a") assert(statement.execute("drop database db_a")) - }) + } } } test("execute statement - create/alter/drop table") { - withJdbcStatement()({ statement => - val createResult = { + withJdbcStatement() { statement => + val createResult = statement.executeQuery("create table tbl_a (a string) with ('connector' = 'blackhole')") - } - if (isFlinkVersionAtLeast("1.15")) { - assert(createResult.next()) - assert(createResult.getString(1) === "OK") - } + assert(createResult.next()) + assert(createResult.getString(1) === "OK") val alterResult = statement.executeQuery("alter table tbl_a rename to tbl_b") - if (isFlinkVersionAtLeast("1.15")) { - assert(alterResult.next()) - assert(alterResult.getString(1) === "OK") - } + assert(alterResult.next()) + assert(alterResult.getString(1) === "OK") val dropResult = statement.executeQuery("drop table tbl_b") - if (isFlinkVersionAtLeast("1.15")) { - assert(dropResult.next()) - assert(dropResult.getString(1) === "OK") - } - }) + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + } } test("execute statement - create/alter/drop view") { - withMultipleConnectionJdbcStatement()({ statement => + withMultipleConnectionJdbcStatement() { statement => val createResult = statement.executeQuery("create view view_a as select 1") - if (isFlinkVersionAtLeast("1.15")) { - assert(createResult.next()) - assert(createResult.getString(1) === "OK") - } + assert(createResult.next()) + assert(createResult.getString(1) === "OK") val alterResult = statement.executeQuery("alter view view_a rename to view_b") - if (isFlinkVersionAtLeast("1.15")) { - assert(alterResult.next()) - assert(alterResult.getString(1) === "OK") - } + assert(alterResult.next()) + assert(alterResult.getString(1) === "OK") val dropResult = statement.executeQuery("drop view view_b") - if (isFlinkVersionAtLeast("1.15")) { - assert(dropResult.next()) - assert(dropResult.getString(1) === "OK") - } - }) + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + } } test("execute statement - insert into") { - withMultipleConnectionJdbcStatement()({ statement => + withMultipleConnectionJdbcStatement() { statement => statement.executeQuery("create table tbl_a (a int) with ('connector' = 'blackhole')") val resultSet = statement.executeQuery("insert into tbl_a select 1") val metadata = resultSet.getMetaData @@ -995,11 +965,11 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { assert(metadata.getColumnType(1) == java.sql.Types.BIGINT) assert(resultSet.next()) assert(resultSet.getLong(1) == -1L) - }) + } } test("execute statement - set properties") { - withMultipleConnectionJdbcStatement()({ statement => + withMultipleConnectionJdbcStatement() { statement => val resultSet = statement.executeQuery("set table.dynamic-table-options.enabled = true") val metadata = resultSet.getMetaData assert(metadata.getColumnName(1) == "key") @@ -1007,21 +977,21 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { assert(resultSet.next()) assert(resultSet.getString(1) == "table.dynamic-table-options.enabled") assert(resultSet.getString(2) == "true") - }) + } } test("execute statement - show properties") { - withMultipleConnectionJdbcStatement()({ statement => + withMultipleConnectionJdbcStatement() { statement => val resultSet = statement.executeQuery("set") val metadata = resultSet.getMetaData assert(metadata.getColumnName(1) == "key") assert(metadata.getColumnName(2) == "value") assert(resultSet.next()) - }) + } } test("execute statement - reset property") { - withMultipleConnectionJdbcStatement()({ statement => + withMultipleConnectionJdbcStatement() { statement => statement.executeQuery("set pipeline.jars = my.jar") statement.executeQuery("reset pipeline.jars") val resultSet = statement.executeQuery("set") @@ -1035,7 +1005,7 @@ class FlinkOperationSuite extends WithFlinkSQLEngine with HiveJDBCTestHelper { } } assert(success) - }) + } } test("execute statement - select udf") { diff --git a/externals/kyuubi-hive-sql-engine/pom.xml b/externals/kyuubi-hive-sql-engine/pom.xml index 1dbc319471a..0319d3dd2f3 100644 --- a/externals/kyuubi-hive-sql-engine/pom.xml +++ b/externals/kyuubi-hive-sql-engine/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala index dc807429c51..d09912770cc 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala @@ -28,6 +28,7 @@ import org.apache.hive.service.cli.session.{HiveSessionImplwithUGI => ImportedHi import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.hive.HiveSQLEngine import org.apache.kyuubi.engine.hive.operation.HiveOperationManager @@ -72,33 +73,38 @@ class HiveSessionManager(engine: HiveSQLEngine) extends SessionManager("HiveSess password: String, ipAddress: String, conf: Map[String, String]): Session = { - val sessionHandle = SessionHandle() - val hive = { - val sessionWithUGI = new ImportedHiveSessionImpl( - new ImportedSessionHandle(sessionHandle.toTSessionHandle, protocol), + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).flatMap( + getSessionOption).getOrElse { + val sessionHandle = + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) + val hive = { + val sessionWithUGI = new ImportedHiveSessionImpl( + new ImportedSessionHandle(sessionHandle.toTSessionHandle, protocol), + protocol, + user, + password, + HiveSQLEngine.hiveConf, + ipAddress, + null, + Seq(ipAddress).asJava) + val proxy = HiveSessionProxy.getProxy(sessionWithUGI, sessionWithUGI.getSessionUgi) + sessionWithUGI.setProxySession(proxy) + proxy + } + hive.setSessionManager(internalSessionManager) + hive.setOperationManager(internalSessionManager.getOperationManager) + operationLogRoot.foreach(dir => hive.setOperationLogSessionDir(new File(dir))) + new HiveSessionImpl( protocol, user, password, - HiveSQLEngine.hiveConf, ipAddress, - null, - Seq(ipAddress).asJava) - val proxy = HiveSessionProxy.getProxy(sessionWithUGI, sessionWithUGI.getSessionUgi) - sessionWithUGI.setProxySession(proxy) - proxy + conf, + this, + sessionHandle, + hive) } - hive.setSessionManager(internalSessionManager) - hive.setOperationManager(internalSessionManager.getOperationManager) - operationLogRoot.foreach(dir => hive.setOperationLogSessionDir(new File(dir))) - new HiveSessionImpl( - protocol, - user, - password, - ipAddress, - conf, - this, - sessionHandle, - hive) + } override def closeSession(sessionHandle: SessionHandle): Unit = { diff --git a/externals/kyuubi-jdbc-engine/pom.xml b/externals/kyuubi-jdbc-engine/pom.xml index 8853cec6421..4bcc4fb601f 100644 --- a/externals/kyuubi-jdbc-engine/pom.xml +++ b/externals/kyuubi-jdbc-engine/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala index 63fb2dd0739..f8cd40412f0 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala @@ -23,8 +23,9 @@ import scala.util.{Failure, Success, Try} import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider -import org.apache.kyuubi.session.{AbstractSession, SessionManager} +import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} class JdbcSessionImpl( protocol: TProtocolVersion, @@ -35,6 +36,9 @@ class JdbcSessionImpl( sessionManager: SessionManager) extends AbstractSession(protocol, user, password, ipAddress, conf, sessionManager) { + override val handle: SessionHandle = + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) + private[jdbc] var sessionConnection: Connection = _ private var databaseMetaData: DatabaseMetaData = _ diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala index db8f60c3cae..09958e0507f 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala @@ -20,6 +20,7 @@ import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.jdbc.JdbcSQLEngine import org.apache.kyuubi.engine.jdbc.operation.JdbcOperationManager @@ -46,7 +47,10 @@ class JdbcSessionManager(name: String) password: String, ipAddress: String, conf: Map[String, String]): Session = { - new JdbcSessionImpl(protocol, user, password, ipAddress, conf, this) + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).flatMap( + getSessionOption).getOrElse { + new JdbcSessionImpl(protocol, user, password, ipAddress, conf, this) + } } override def closeSession(sessionHandle: SessionHandle): Unit = { diff --git a/externals/kyuubi-spark-sql-engine/pom.xml b/externals/kyuubi-spark-sql-engine/pom.xml index 0ea3aaaba1c..5b227cb5e29 100644 --- a/externals/kyuubi-spark-sql-engine/pom.xml +++ b/externals/kyuubi-spark-sql-engine/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml @@ -193,6 +193,12 @@ jetcd-launcher test + + + com.vladsch.flexmark + flexmark-all + test + diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala index d4eaf3454a4..854a28e85a1 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala @@ -27,6 +27,7 @@ import org.apache.spark.SparkContext import org.apache.spark.kyuubi.SparkContextHelper import org.apache.kyuubi.{KyuubiSQLException, Logging} +import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiReservedKeys._ import org.apache.kyuubi.ha.client.{EngineServiceDiscovery, ServiceDiscovery} import org.apache.kyuubi.service.{Serverable, Service, TBinaryFrontendService} @@ -94,7 +95,15 @@ class SparkTBinaryFrontendService( } override def attributes: Map[String, String] = { - Map(KYUUBI_ENGINE_ID -> KyuubiSparkUtil.engineId) + val extraAttributes = conf.get(KyuubiConf.ENGINE_SPARK_REGISTER_ATTRIBUTES).map { attr => + attr -> KyuubiSparkUtil.globalSparkContext.getConf.get(attr, "") + }.toMap + val attributes = extraAttributes ++ Map(KYUUBI_ENGINE_ID -> KyuubiSparkUtil.engineId) + // TODO Support Spark Web UI Enabled SSL + sc.uiWebUrl match { + case Some(url) => attributes ++ Map(KYUUBI_ENGINE_URL -> url.split("//").last) + case None => attributes + } } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala index e48ff6e5b06..d2627fd99fd 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala @@ -88,9 +88,9 @@ class ExecutePython( val output = response.map(_.content.getOutput()).getOrElse("") val ename = response.map(_.content.getEname()).getOrElse("") val evalue = response.map(_.content.getEvalue()).getOrElse("") - val traceback = response.map(_.content.getTraceback()).getOrElse(Array.empty) + val traceback = response.map(_.content.getTraceback()).getOrElse(Seq.empty) iter = - new ArrayFetchIterator[Row](Array(Row(output, status, ename, evalue, Row(traceback: _*)))) + new ArrayFetchIterator[Row](Array(Row(output, status, ename, evalue, traceback))) setState(OperationState.FINISHED) } else { throw KyuubiSQLException(s"Interpret error:\n$statement\n $response") @@ -210,7 +210,7 @@ case class SessionPythonWorker( stdin.flush() val pythonResponse = Option(stdout.readLine()).map(ExecutePython.fromJson[PythonResponse](_)) // throw exception if internal python code fail - if (internal && pythonResponse.map(_.content.status) != Some(PythonResponse.OK_STATUS)) { + if (internal && !pythonResponse.map(_.content.status).contains(PythonResponse.OK_STATUS)) { throw KyuubiSQLException(s"Internal python code $code failure: $pythonResponse") } pythonResponse @@ -328,7 +328,7 @@ object ExecutePython extends Logging { } // for test - def defaultSparkHome(): String = { + def defaultSparkHome: String = { val homeDirFilter: FilenameFilter = (dir: File, name: String) => dir.isDirectory && name.contains("spark-") && !name.contains("-engine") // get from kyuubi-server/../externals/kyuubi-download/target @@ -418,7 +418,7 @@ case class PythonResponseContent( data: Map[String, String], ename: String, evalue: String, - traceback: Array[String], + traceback: Seq[String], status: String) { def getOutput(): String = { Option(data) @@ -431,7 +431,7 @@ case class PythonResponseContent( def getEvalue(): String = { Option(evalue).getOrElse("") } - def getTraceback(): Array[String] = { - Option(traceback).getOrElse(Array.empty) + def getTraceback(): Seq[String] = { + Option(traceback).getOrElse(Seq.empty) } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala index 0f63dcc067f..ff686cca0d0 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala @@ -112,7 +112,7 @@ class ExecuteScala( new ArrayFetchIterator[Row](result.collect()) } else { val output = repl.getOutput - info("scala repl output:\n" + output) + debug("scala repl output:\n" + output) new ArrayFetchIterator[Row](Array(Row(output))) } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala index 2cdc2b50083..b29d2ca9a7e 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala @@ -21,14 +21,16 @@ import java.util.concurrent.RejectedExecutionException import scala.collection.JavaConverters._ -import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.kyuubi.SparkDatasetHelper import org.apache.spark.sql.types._ import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.config.KyuubiConf.OPERATION_RESULT_MAX_ROWS import org.apache.kyuubi.engine.spark.KyuubiSparkUtil._ -import org.apache.kyuubi.operation.{ArrayFetchIterator, IterableFetchIterator, OperationState} +import org.apache.kyuubi.operation.{ArrayFetchIterator, FetchIterator, IterableFetchIterator, OperationHandle, OperationState} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session @@ -37,7 +39,8 @@ class ExecuteStatement( override val statement: String, override val shouldRunAsync: Boolean, queryTimeout: Long, - incrementalCollect: Boolean) + incrementalCollect: Boolean, + override protected val handle: OperationHandle) extends SparkOperation(session) with Logging { private val operationLog: OperationLog = OperationLog.createOperationLog(session, getHandle) @@ -62,51 +65,26 @@ class ExecuteStatement( OperationLog.removeCurrentOperationLog() } - private def executeStatement(): Unit = withLocalProperties { + protected def incrementalCollectResult(resultDF: DataFrame): Iterator[Any] = { + resultDF.toLocalIterator().asScala + } + + protected def fullCollectResult(resultDF: DataFrame): Array[_] = { + resultDF.collect() + } + + protected def takeResult(resultDF: DataFrame, maxRows: Int): Array[_] = { + resultDF.take(maxRows) + } + + protected def executeStatement(): Unit = withLocalProperties { try { setState(OperationState.RUNNING) info(diagnostics) Thread.currentThread().setContextClassLoader(spark.sharedState.jarClassLoader) addOperationListener() result = spark.sql(statement) - - iter = - if (incrementalCollect) { - info("Execute in incremental collect mode") - if (arrowEnabled) { - new IterableFetchIterator[Array[Byte]](new Iterable[Array[Byte]] { - override def iterator: Iterator[Array[Byte]] = SparkDatasetHelper.toArrowBatchRdd( - convertComplexType(result)).toLocalIterator - }) - } else { - new IterableFetchIterator[Row](new Iterable[Row] { - override def iterator: Iterator[Row] = result.toLocalIterator().asScala - }) - } - } else { - val resultMaxRows = spark.conf.getOption(OPERATION_RESULT_MAX_ROWS.key).map(_.toInt) - .getOrElse(session.sessionManager.getConf.get(OPERATION_RESULT_MAX_ROWS)) - if (resultMaxRows <= 0) { - info("Execute in full collect mode") - if (arrowEnabled) { - new ArrayFetchIterator( - SparkDatasetHelper.toArrowBatchRdd( - convertComplexType(result)).collect()) - } else { - new ArrayFetchIterator(result.collect()) - } - } else { - info(s"Execute with max result rows[$resultMaxRows]") - if (arrowEnabled) { - // this will introduce shuffle and hurt performance - new ArrayFetchIterator( - SparkDatasetHelper.toArrowBatchRdd( - convertComplexType(result.limit(resultMaxRows))).collect()) - } else { - new ArrayFetchIterator(result.take(resultMaxRows)) - } - } - } + iter = collectAsIterator(result) setCompiledStateIfNeeded() setState(OperationState.FINISHED) } catch { @@ -164,17 +142,87 @@ class ExecuteStatement( } } - // TODO:(fchen) make this configurable - val kyuubiBeelineConvertToString = true - - def convertComplexType(df: DataFrame): DataFrame = { - if (kyuubiBeelineConvertToString) { - SparkDatasetHelper.convertTopLevelComplexTypeToHiveString(df) + override def getResultSetMetadataHints(): Seq[String] = + Seq( + s"__kyuubi_operation_result_format__=$resultFormat", + s"__kyuubi_operation_result_arrow_timestampAsString__=$timestampAsString") + + private def collectAsIterator(resultDF: DataFrame): FetchIterator[_] = { + val resultMaxRows = spark.conf.getOption(OPERATION_RESULT_MAX_ROWS.key).map(_.toInt) + .getOrElse(session.sessionManager.getConf.get(OPERATION_RESULT_MAX_ROWS)) + if (incrementalCollect) { + if (resultMaxRows > 0) { + warn(s"Ignore ${OPERATION_RESULT_MAX_ROWS.key} on incremental collect mode.") + } + info("Execute in incremental collect mode") + new IterableFetchIterator[Any](new Iterable[Any] { + override def iterator: Iterator[Any] = incrementalCollectResult(resultDF) + }) } else { - df + val internalArray = if (resultMaxRows <= 0) { + info("Execute in full collect mode") + fullCollectResult(resultDF) + } else { + info(s"Execute with max result rows[$resultMaxRows]") + takeResult(resultDF, resultMaxRows) + } + new ArrayFetchIterator(internalArray) } } +} + +class ArrowBasedExecuteStatement( + session: Session, + override val statement: String, + override val shouldRunAsync: Boolean, + queryTimeout: Long, + incrementalCollect: Boolean, + override protected val handle: OperationHandle) + extends ExecuteStatement( + session, + statement, + shouldRunAsync, + queryTimeout, + incrementalCollect, + handle) { + + override protected def incrementalCollectResult(resultDF: DataFrame): Iterator[Any] = { + collectAsArrow(convertComplexType(resultDF)) { rdd => + rdd.toLocalIterator + } + } + + override protected def fullCollectResult(resultDF: DataFrame): Array[_] = { + collectAsArrow(convertComplexType(resultDF)) { rdd => + rdd.collect() + } + } + + override protected def takeResult(resultDF: DataFrame, maxRows: Int): Array[_] = { + // this will introduce shuffle and hurt performance + val limitedResult = resultDF.limit(maxRows) + collectAsArrow(convertComplexType(limitedResult)) { rdd => + rdd.collect() + } + } + + /** + * refer to org.apache.spark.sql.Dataset#withAction(), assign a new execution id for arrow-based + * operation, so that we can track the arrow-based queries on the UI tab. + */ + private def collectAsArrow[T](df: DataFrame)(action: RDD[Array[Byte]] => T): T = { + SQLExecution.withNewExecutionId(df.queryExecution, Some("collectAsArrow")) { + df.queryExecution.executedPlan.resetMetrics() + action(SparkDatasetHelper.toArrowBatchRdd(df)) + } + } + + override protected def isArrowBasedOperation: Boolean = true + + override val resultFormat = "arrow" + + private def convertComplexType(df: DataFrame): DataFrame = { + SparkDatasetHelper.convertTopLevelComplexTypeToHiveString(df, timestampAsString) + } - override def getResultSetMetadataHints(): Seq[String] = - Seq(s"__kyuubi_operation_result_format__=$resultFormat") } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala index 4093c61c100..40642b825b9 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala @@ -19,6 +19,7 @@ package org.apache.kyuubi.engine.spark.operation import org.apache.spark.sql.types.StructType +import org.apache.kyuubi.config.KyuubiConf.OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES import org.apache.kyuubi.engine.spark.shim.SparkCatalogShim import org.apache.kyuubi.operation.IterableFetchIterator import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ @@ -32,6 +33,12 @@ class GetTables( tableTypes: Set[String]) extends SparkOperation(session) { + protected val ignoreTableProperties = + spark.conf.getOption(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES.key) match { + case Some(s) => s.toBoolean + case _ => session.sessionManager.getConf.get(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES) + } + override def statement: String = { super.statement + s" [catalog: $catalog," + @@ -68,7 +75,13 @@ class GetTables( val tablePattern = toJavaRegex(tableName) val sparkShim = SparkCatalogShim() val catalogTablesAndViews = - sparkShim.getCatalogTablesOrViews(spark, catalog, schemaPattern, tablePattern, tableTypes) + sparkShim.getCatalogTablesOrViews( + spark, + catalog, + schemaPattern, + tablePattern, + tableTypes, + ignoreTableProperties) val allTableAndViews = if (tableTypes.exists("VIEW".equalsIgnoreCase)) { diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala index 842ff944f34..eb58407d47c 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala @@ -24,6 +24,7 @@ import org.apache.hive.service.rpc.thrift.{TGetResultSetMetadataResp, TProgressU import org.apache.spark.kyuubi.{SparkProgressMonitor, SQLOperationListener} import org.apache.spark.kyuubi.SparkUtilsHelper.redact import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.types.StructType import org.apache.kyuubi.{KyuubiSQLException, Utils} @@ -135,27 +136,35 @@ abstract class SparkOperation(session: Session) spark.sparkContext.setLocalProperty protected def withLocalProperties[T](f: => T): T = { - try { - spark.sparkContext.setJobGroup(statementId, redactedStatement, forceCancel) - spark.sparkContext.setLocalProperty(KYUUBI_SESSION_USER_KEY, session.user) - spark.sparkContext.setLocalProperty(KYUUBI_STATEMENT_ID_KEY, statementId) - schedulerPool match { - case Some(pool) => - spark.sparkContext.setLocalProperty(SPARK_SCHEDULER_POOL_KEY, pool) - case None => - } - if (isSessionUserSignEnabled) { - setSessionUserSign() - } + SQLExecution.withSQLConfPropagated(spark) { + val originalSession = SparkSession.getActiveSession + try { + SparkSession.setActiveSession(spark) + spark.sparkContext.setJobGroup(statementId, redactedStatement, forceCancel) + spark.sparkContext.setLocalProperty(KYUUBI_SESSION_USER_KEY, session.user) + spark.sparkContext.setLocalProperty(KYUUBI_STATEMENT_ID_KEY, statementId) + schedulerPool match { + case Some(pool) => + spark.sparkContext.setLocalProperty(SPARK_SCHEDULER_POOL_KEY, pool) + case None => + } + if (isSessionUserSignEnabled) { + setSessionUserSign() + } - f - } finally { - spark.sparkContext.setLocalProperty(SPARK_SCHEDULER_POOL_KEY, null) - spark.sparkContext.setLocalProperty(KYUUBI_SESSION_USER_KEY, null) - spark.sparkContext.setLocalProperty(KYUUBI_STATEMENT_ID_KEY, null) - spark.sparkContext.clearJobGroup() - if (isSessionUserSignEnabled) { - clearSessionUserSign() + f + } finally { + spark.sparkContext.setLocalProperty(SPARK_SCHEDULER_POOL_KEY, null) + spark.sparkContext.setLocalProperty(KYUUBI_SESSION_USER_KEY, null) + spark.sparkContext.setLocalProperty(KYUUBI_STATEMENT_ID_KEY, null) + spark.sparkContext.clearJobGroup() + if (isSessionUserSignEnabled) { + clearSessionUserSign() + } + originalSession match { + case Some(session) => SparkSession.setActiveSession(session) + case None => SparkSession.clearActiveSession() + } } } } @@ -236,7 +245,7 @@ abstract class SparkOperation(session: Session) case FETCH_FIRST => iter.fetchAbsolute(0); } resultRowSet = - if (arrowEnabled) { + if (isArrowBasedOperation) { if (iter.hasNext) { val taken = iter.next().asInstanceOf[Array[Byte]] RowSet.toTRowSet(taken, getProtocolVersion) @@ -246,10 +255,9 @@ abstract class SparkOperation(session: Session) } else { val taken = iter.take(rowSetSize) RowSet.toTRowSet( - taken.toList.asInstanceOf[List[Row]], + taken.toSeq.asInstanceOf[Seq[Row]], resultSchema, - getProtocolVersion, - timeZone) + getProtocolVersion) } resultRowSet.setStartRowOffset(iter.getPosition) } catch onError(cancel = true) @@ -259,15 +267,12 @@ abstract class SparkOperation(session: Session) override def shouldRunAsync: Boolean = false - protected def arrowEnabled(): Boolean = { - resultFormat().equalsIgnoreCase("arrow") && - // TODO: (fchen) make all operation support arrow - getClass.getCanonicalName == classOf[ExecuteStatement].getCanonicalName - } + protected def isArrowBasedOperation: Boolean = false + + protected def resultFormat: String = "thrift" - protected def resultFormat(): String = { - // TODO: respect the config of the operation ExecuteStatement, if it was set. - spark.conf.get("kyuubi.operation.result.format", "thrift") + protected def timestampAsString: Boolean = { + spark.conf.get("kyuubi.operation.result.arrow.timestampAsString", "false").toBoolean } protected def setSessionUserSign(): Unit = { diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala index 5c5ed0f9868..8fd58b33875 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala @@ -23,10 +23,11 @@ import scala.collection.JavaConverters._ import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_OPERATION_HANDLE_KEY import org.apache.kyuubi.engine.spark.repl.KyuubiSparkILoop import org.apache.kyuubi.engine.spark.session.SparkSessionImpl import org.apache.kyuubi.engine.spark.shim.SparkCatalogShim -import org.apache.kyuubi.operation.{NoneMode, Operation, OperationManager, PlanOnlyMode} +import org.apache.kyuubi.operation.{NoneMode, Operation, OperationHandle, OperationManager, PlanOnlyMode} import org.apache.kyuubi.session.{Session, SessionHandle} class SparkSQLOperationManager private (name: String) extends OperationManager(name) { @@ -70,6 +71,8 @@ class SparkSQLOperationManager private (name: String) extends OperationManager(n val lang = OperationLanguages(confOverlay.getOrElse( OPERATION_LANGUAGE.key, spark.conf.get(OPERATION_LANGUAGE.key, operationLanguageDefault))) + val opHandle = confOverlay.get(KYUUBI_OPERATION_HANDLE_KEY).map( + OperationHandle.apply).getOrElse(OperationHandle()) val operation = lang match { case OperationLanguages.SQL => @@ -82,7 +85,26 @@ class SparkSQLOperationManager private (name: String) extends OperationManager(n case NoneMode => val incrementalCollect = spark.conf.getOption(OPERATION_INCREMENTAL_COLLECT.key) .map(_.toBoolean).getOrElse(operationIncrementalCollectDefault) - new ExecuteStatement(session, statement, runAsync, queryTimeout, incrementalCollect) + // TODO: respect the config of the operation ExecuteStatement, if it was set. + val resultFormat = spark.conf.get("kyuubi.operation.result.format", "thrift") + resultFormat.toLowerCase match { + case "arrow" => + new ArrowBasedExecuteStatement( + session, + statement, + runAsync, + queryTimeout, + incrementalCollect, + opHandle) + case _ => + new ExecuteStatement( + session, + statement, + runAsync, + queryTimeout, + incrementalCollect, + opHandle) + } case mode => new PlanOnlyStatement(session, statement, mode) } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala index 8cc88156ba5..4f935ce49f0 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala @@ -18,22 +18,24 @@ package org.apache.kyuubi.engine.spark.schema import java.nio.ByteBuffer -import java.nio.charset.StandardCharsets -import java.sql.Timestamp -import java.time._ -import java.util.Date import scala.collection.JavaConverters._ import org.apache.hive.service.rpc.thrift._ import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.HiveResult import org.apache.spark.sql.types._ -import org.apache.kyuubi.engine.spark.schema.SchemaHelper.TIMESTAMP_NTZ import org.apache.kyuubi.util.RowSetUtils._ object RowSet { + def toHiveString(valueAndType: (Any, DataType), nested: Boolean = false): String = { + // compatible w/ Spark 3.1 and above + val timeFormatters = HiveResult.getTimeFormatters + HiveResult.toHiveString(valueAndType, nested, timeFormatters) + } + def toTRowSet( bytes: Array[Byte], protocolVersion: TProtocolVersion): TRowSet = { @@ -58,26 +60,25 @@ object RowSet { def toTRowSet( rows: Seq[Row], schema: StructType, - protocolVersion: TProtocolVersion, - timeZone: ZoneId): TRowSet = { + protocolVersion: TProtocolVersion): TRowSet = { if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBasedSet(rows, schema, timeZone) + toRowBasedSet(rows, schema) } else { - toColumnBasedSet(rows, schema, timeZone) + toColumnBasedSet(rows, schema) } } - def toRowBasedSet(rows: Seq[Row], schema: StructType, timeZone: ZoneId): TRowSet = { - var i = 0 + def toRowBasedSet(rows: Seq[Row], schema: StructType): TRowSet = { val rowSize = rows.length val tRows = new java.util.ArrayList[TRow](rowSize) + var i = 0 while (i < rowSize) { val row = rows(i) val tRow = new TRow() var j = 0 val columnSize = row.length while (j < columnSize) { - val columnValue = toTColumnValue(j, row, schema, timeZone) + val columnValue = toTColumnValue(j, row, schema) tRow.addToColVals(columnValue) j += 1 } @@ -87,21 +88,21 @@ object RowSet { new TRowSet(0, tRows) } - def toColumnBasedSet(rows: Seq[Row], schema: StructType, timeZone: ZoneId): TRowSet = { + def toColumnBasedSet(rows: Seq[Row], schema: StructType): TRowSet = { val rowSize = rows.length val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](rowSize)) var i = 0 val columnSize = schema.length while (i < columnSize) { val field = schema(i) - val tColumn = toTColumn(rows, i, field.dataType, timeZone) + val tColumn = toTColumn(rows, i, field.dataType) tRowSet.addToColumns(tColumn) i += 1 } tRowSet } - private def toTColumn(rows: Seq[Row], ordinal: Int, typ: DataType, timeZone: ZoneId): TColumn = { + private def toTColumn(rows: Seq[Row], ordinal: Int, typ: DataType): TColumn = { val nulls = new java.util.BitSet() typ match { case BooleanType => @@ -151,13 +152,7 @@ object RowSet { while (i < rowSize) { val row = rows(i) nulls.set(i, row.isNullAt(ordinal)) - val value = - if (row.isNullAt(ordinal)) { - "" - } else { - toHiveString((row.get(ordinal), typ), timeZone) - } - values.add(value) + values.add(toHiveString(row.get(ordinal) -> typ)) i += 1 } TColumn.stringVal(new TStringColumn(values, nulls)) @@ -189,8 +184,7 @@ object RowSet { private def toTColumnValue( ordinal: Int, row: Row, - types: StructType, - timeZone: ZoneId): TColumnValue = { + types: StructType): TColumnValue = { types(ordinal).dataType match { case BooleanType => val boolValue = new TBoolValue @@ -238,69 +232,12 @@ object RowSet { case _ => val tStrValue = new TStringValue if (!row.isNullAt(ordinal)) { - tStrValue.setValue( - toHiveString((row.get(ordinal), types(ordinal).dataType), timeZone)) + tStrValue.setValue(toHiveString(row.get(ordinal) -> types(ordinal).dataType)) } TColumnValue.stringVal(tStrValue) } } - /** - * A simpler impl of Spark's toHiveString - */ - def toHiveString(dataWithType: (Any, DataType), timeZone: ZoneId): String = { - dataWithType match { - case (null, _) => - // Only match nulls in nested type values - "null" - - case (d: Date, DateType) => - formatDate(d) - - case (ld: LocalDate, DateType) => - formatLocalDate(ld) - - case (t: Timestamp, TimestampType) => - formatTimestamp(t, Option(timeZone)) - - case (t: LocalDateTime, ntz) if ntz.getClass.getSimpleName.equals(TIMESTAMP_NTZ) => - formatLocalDateTime(t) - - case (i: Instant, TimestampType) => - formatInstant(i, Option(timeZone)) - - case (bin: Array[Byte], BinaryType) => - new String(bin, StandardCharsets.UTF_8) - - case (decimal: java.math.BigDecimal, DecimalType()) => - decimal.toPlainString - - case (s: String, StringType) => - // Only match string in nested type values - "\"" + s + "\"" - - case (d: Duration, _) => toDayTimeIntervalString(d) - - case (p: Period, _) => toYearMonthIntervalString(p) - - case (seq: scala.collection.Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(e => toHiveString(e, timeZone)).mkString("[", ",", "]") - - case (m: Map[_, _], MapType(kType, vType, _)) => - m.map { case (key, value) => - toHiveString((key, kType), timeZone) + ":" + toHiveString((value, vType), timeZone) - }.toSeq.sorted.mkString("{", ",", "}") - - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { case (v, t) => - s""""${t.name}":${toHiveString((v, t.dataType), timeZone)}""" - }.mkString("{", ",", "}") - - case (other, _) => - other.toString - } - } - private def toTColumn(data: Array[Byte]): TColumn = { val values = new java.util.ArrayList[ByteBuffer](1) values.add(ByteBuffer.wrap(data)) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala index 76c6a65050d..79f38ce35a4 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala @@ -20,10 +20,12 @@ package org.apache.kyuubi.engine.spark.session import java.util.concurrent.{ScheduledExecutorService, TimeUnit} import org.apache.hive.service.rpc.thrift.TProtocolVersion +import org.apache.spark.api.python.KyuubiPythonGatewayServer import org.apache.spark.sql.SparkSession import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.ShareLevel._ import org.apache.kyuubi.engine.spark.{KyuubiSparkUtil, SparkSQLEngine} @@ -93,6 +95,7 @@ class SparkSQLSessionManager private (name: String, spark: SparkSession) override def stop(): Unit = { super.stop() + KyuubiPythonGatewayServer.shutdown() userIsolatedSparkSessionThread.foreach(_.shutdown()) } @@ -135,21 +138,24 @@ class SparkSQLSessionManager private (name: String, spark: SparkSession) password: String, ipAddress: String, conf: Map[String, String]): Session = { - val sparkSession = - try { - getOrNewSparkSession(user) - } catch { - case e: Exception => throw KyuubiSQLException(e) - } + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).flatMap( + getSessionOption).getOrElse { + val sparkSession = + try { + getOrNewSparkSession(user) + } catch { + case e: Exception => throw KyuubiSQLException(e) + } - new SparkSessionImpl( - protocol, - user, - password, - ipAddress, - conf, - this, - sparkSession) + new SparkSessionImpl( + protocol, + user, + password, + ipAddress, + conf, + this, + sparkSession) + } } override def closeSession(sessionHandle: SessionHandle): Unit = { @@ -164,7 +170,12 @@ class SparkSQLSessionManager private (name: String, spark: SparkSession) } } } - super.closeSession(sessionHandle) + try { + super.closeSession(sessionHandle) + } catch { + case e: KyuubiSQLException => + warn(s"Error closing session ${sessionHandle}", e) + } if (shareLevel == ShareLevel.CONNECTION) { info("Session stopped due to shared level is Connection.") stopSession() diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala index 5bf1ec08472..78164ff5fab 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala @@ -21,13 +21,14 @@ import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtoco import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.spark.events.SessionEvent import org.apache.kyuubi.engine.spark.operation.SparkSQLOperationManager import org.apache.kyuubi.engine.spark.shim.SparkCatalogShim import org.apache.kyuubi.engine.spark.udf.KDFRegistry import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{Operation, OperationHandle} -import org.apache.kyuubi.session.{AbstractSession, SessionManager} +import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} class SparkSessionImpl( protocol: TProtocolVersion, @@ -39,6 +40,9 @@ class SparkSessionImpl( val spark: SparkSession) extends AbstractSession(protocol, user, password, ipAddress, conf, sessionManager) { + override val handle: SessionHandle = + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) + private def setModifiableConfig(key: String, value: String): Unit = { try { spark.conf.set(key, value) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala index 5977cd415b0..ea72dd1563c 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala @@ -41,7 +41,7 @@ class CatalogShim_v2_4 extends SparkCatalogShim { catalogName: String, schemaPattern: String): Seq[Row] = { (spark.sessionState.catalog.listDatabases(schemaPattern) ++ - getGlobalTempViewManager(spark, schemaPattern)).map(Row(_, "")) + getGlobalTempViewManager(spark, schemaPattern)).map(Row(_, SparkCatalogShim.SESSION_CATALOG)) } def setCurrentDatabase(spark: SparkSession, databaseName: String): Unit = { @@ -64,7 +64,8 @@ class CatalogShim_v2_4 extends SparkCatalogShim { catalogName: String, schemaPattern: String, tablePattern: String, - tableTypes: Set[String]): Seq[Row] = { + tableTypes: Set[String], + ignoreTableProperties: Boolean): Seq[Row] = { val catalog = spark.sessionState.catalog val databases = catalog.listDatabases(schemaPattern) @@ -139,13 +140,7 @@ class CatalogShim_v2_4 extends SparkCatalogShim { databases.flatMap { db => val identifiers = catalog.listTables(db, tablePattern, includeLocalTempViews = true) catalog.getTablesByName(identifiers).flatMap { t => - val tableSchema = - if (t.provider.getOrElse("").equalsIgnoreCase("delta")) { - spark.table(t.identifier.table).schema - } else { - t.schema - } - tableSchema.zipWithIndex.filter(f => columnPattern.matcher(f._1.name).matches()) + t.schema.zipWithIndex.filter(f => columnPattern.matcher(f._1.name).matches()) .map { case (f, i) => toColumnResult(catalogName, t.database, t.identifier.table, f, i) } } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala index d60f94ac755..27c524f3032 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala @@ -129,13 +129,12 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 { spark: SparkSession, catalogName: String, schemaPattern: String): Seq[Row] = { - val catalog = getCatalog(spark, catalogName) - var schemas = getSchemasWithPattern(catalog, schemaPattern) if (catalogName == SparkCatalogShim.SESSION_CATALOG) { - val viewMgr = getGlobalTempViewManager(spark, schemaPattern) - schemas = schemas ++ viewMgr + super.getSchemas(spark, catalogName, schemaPattern) + } else { + val catalog = getCatalog(spark, catalogName) + getSchemasWithPattern(catalog, schemaPattern).map(Row(_, catalog.name)) } - schemas.map(Row(_, catalog.name)) } override def setCurrentDatabase(spark: SparkSession, databaseName: String): Unit = { @@ -151,7 +150,8 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 { catalogName: String, schemaPattern: String, tablePattern: String, - tableTypes: Set[String]): Seq[Row] = { + tableTypes: Set[String], + ignoreTableProperties: Boolean = false): Seq[Row] = { val catalog = getCatalog(spark, catalogName) val namespaces = listNamespacesWithPattern(catalog, schemaPattern) catalog match { @@ -161,16 +161,17 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 { SESSION_CATALOG, schemaPattern, tablePattern, - tableTypes) + tableTypes, + ignoreTableProperties) case tc: TableCatalog => val tp = tablePattern.r.pattern val identifiers = namespaces.flatMap { ns => tc.listTables(ns).filter(i => tp.matcher(quoteIfNeeded(i.name())).matches()) } identifiers.map { ident => - val table = tc.loadTable(ident) // TODO: restore view type for session catalog - val comment = table.properties().getOrDefault(TableCatalog.PROP_COMMENT, "") + val comment = if (ignoreTableProperties) "" + else tc.loadTable(ident).properties().getOrDefault(TableCatalog.PROP_COMMENT, "") val schema = ident.namespace().map(quoteIfNeeded).mkString(".") val tableName = quoteIfNeeded(ident.name()) Row(catalog.name(), schema, tableName, "TABLE", comment, null, null, null, null, null) @@ -188,14 +189,6 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 { val catalog = getCatalog(spark, catalogName) catalog match { - case builtin if builtin.name() == SESSION_CATALOG => - super.getColumnsByCatalog( - spark, - SESSION_CATALOG, - schemaPattern, - tablePattern, - columnPattern) - case tc: TableCatalog => val namespaces = listNamespacesWithPattern(catalog, schemaPattern) val tp = tablePattern.r.pattern @@ -210,6 +203,14 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 { table.schema.zipWithIndex.filter(f => columnPattern.matcher(f._1.name).matches()) .map { case (f, i) => toColumnResult(tc.name(), namespace, tableName, f, i) } } + + case builtin if builtin.name() == SESSION_CATALOG => + super.getColumnsByCatalog( + spark, + SESSION_CATALOG, + schemaPattern, + tablePattern, + columnPattern) } } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/SparkCatalogShim.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/SparkCatalogShim.scala index bc5792823f7..83c80652380 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/SparkCatalogShim.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/SparkCatalogShim.scala @@ -69,7 +69,8 @@ trait SparkCatalogShim extends Logging { catalogName: String, schemaPattern: String, tablePattern: String, - tableTypes: Set[String]): Seq[Row] + tableTypes: Set[String], + ignoreTableProperties: Boolean): Seq[Row] def getTempViews( spark: SparkSession, diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/api/python/KyuubiPythonGatewayServer.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/api/python/KyuubiPythonGatewayServer.scala index 7e15ffe05a6..8cf8d685c86 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/api/python/KyuubiPythonGatewayServer.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/api/python/KyuubiPythonGatewayServer.scala @@ -30,10 +30,12 @@ object KyuubiPythonGatewayServer extends Logging { val CONNECTION_FILE_PATH = Utils.createTempDir() + "/connection.info" - def start(): Unit = { + private var gatewayServer: Py4JServer = _ + + def start(): Unit = synchronized { val sparkConf = new SparkConf() - val gatewayServer: Py4JServer = new Py4JServer(sparkConf) + gatewayServer = new Py4JServer(sparkConf) gatewayServer.start() val boundPort: Int = gatewayServer.getListeningPort @@ -65,4 +67,11 @@ object KyuubiPythonGatewayServer extends Logging { System.exit(1) } } + + def shutdown(): Unit = synchronized { + if (gatewayServer != null) { + logInfo("shutting down the python gateway server.") + gatewayServer.shutdown() + } + } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala index 23f7df21310..1a542937338 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala @@ -17,12 +17,10 @@ package org.apache.spark.sql.kyuubi -import java.time.ZoneId - import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType} +import org.apache.spark.sql.types._ import org.apache.kyuubi.engine.spark.schema.RowSet @@ -31,21 +29,24 @@ object SparkDatasetHelper { ds.toArrowBatchRdd } - def convertTopLevelComplexTypeToHiveString(df: DataFrame): DataFrame = { - val timeZone = ZoneId.of(df.sparkSession.sessionState.conf.sessionLocalTimeZone) + def convertTopLevelComplexTypeToHiveString( + df: DataFrame, + timestampAsString: Boolean): DataFrame = { val quotedCol = (name: String) => col(quoteIfNeeded(name)) - // an udf to call `RowSet.toHiveString` on complex types(struct/array/map). + // an udf to call `RowSet.toHiveString` on complex types(struct/array/map) and timestamp type. val toHiveStringUDF = udf[String, Row, String]((row, schemaDDL) => { val dt = DataType.fromDDL(schemaDDL) dt match { case StructType(Array(StructField(_, st: StructType, _, _))) => - RowSet.toHiveString((row, st), timeZone) + RowSet.toHiveString((row, st), nested = true) case StructType(Array(StructField(_, at: ArrayType, _, _))) => - RowSet.toHiveString((row.toSeq.head, at), timeZone) + RowSet.toHiveString((row.toSeq.head, at), nested = true) case StructType(Array(StructField(_, mt: MapType, _, _))) => - RowSet.toHiveString((row.toSeq.head, mt), timeZone) + RowSet.toHiveString((row.toSeq.head, mt), nested = true) + case StructType(Array(StructField(_, tt: TimestampType, _, _))) => + RowSet.toHiveString((row.toSeq.head, tt), nested = true) case _ => throw new UnsupportedOperationException } @@ -54,7 +55,9 @@ object SparkDatasetHelper { val cols = df.schema.map { case sf @ StructField(name, _: StructType, _, _) => toHiveStringUDF(quotedCol(name), lit(sf.toDDL)).as(name) - case sf @ StructField(name, (_: MapType | _: ArrayType), _, _) => + case sf @ StructField(name, _: MapType | _: ArrayType, _, _) => + toHiveStringUDF(struct(quotedCol(name)), lit(sf.toDDL)).as(name) + case sf @ StructField(name, _: TimestampType, _, _) if timestampAsString => toHiveStringUDF(struct(quotedCol(name)), lit(sf.toDDL)).as(name) case StructField(name, _, _, _) => quotedCol(name) } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/ui/EnginePage.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/ui/EnginePage.scala index 0aba0c7c588..a2a2931f411 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/ui/EnginePage.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/ui/EnginePage.scala @@ -84,6 +84,10 @@ case class EnginePage(parent: EngineTab) extends WebUIPage("") { Background execution pool threads active: {engine.backendService.sessionManager.getActiveCount} +
    • + Background execution pool work queue size: + {engine.backendService.sessionManager.getWorkQueueSize} +
    • }.getOrElse(Seq.empty) }
    diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/SparkEngineRegisterSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/SparkEngineRegisterSuite.scala new file mode 100644 index 00000000000..8c636af7612 --- /dev/null +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/SparkEngineRegisterSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.spark + +import java.util.UUID + +import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_ID, KYUUBI_ENGINE_URL} + +trait SparkEngineRegisterSuite extends WithDiscoverySparkSQLEngine { + + override def withKyuubiConf: Map[String, String] = + super.withKyuubiConf ++ Map("spark.ui.enabled" -> "true") + + override val namespace: String = s"/kyuubi/deregister_test/${UUID.randomUUID.toString}" + + test("Spark Engine Register Zookeeper with spark ui info") { + withDiscoveryClient(client => { + val info = client.getChildren(namespace).head.split(";") + assert(info.exists(_.startsWith(KYUUBI_ENGINE_ID))) + assert(info.exists(_.startsWith(KYUUBI_ENGINE_URL))) + }) + } +} + +class ZookeeperSparkEngineRegisterSuite extends SparkEngineRegisterSuite + with WithEmbeddedZookeeper { + + override def withKyuubiConf: Map[String, String] = + super.withKyuubiConf ++ zookeeperConf +} + +class EtcdSparkEngineRegisterSuite extends SparkEngineRegisterSuite + with WithEtcdCluster { + override def withKyuubiConf: Map[String, String] = super.withKyuubiConf ++ etcdConf +} diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala index e464569147c..ae6237bb59c 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala @@ -19,8 +19,14 @@ package org.apache.kyuubi.engine.spark.operation import java.sql.Statement +import org.apache.spark.KyuubiSparkContextHelper +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} +import org.apache.spark.sql.execution.QueryExecution +import org.apache.spark.sql.util.QueryExecutionListener + import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.engine.spark.WithSparkSQLEngine +import org.apache.kyuubi.engine.spark.{SparkSQLEngine, WithSparkSQLEngine} +import org.apache.kyuubi.engine.spark.session.SparkSessionImpl import org.apache.kyuubi.operation.SparkDataTypeTests class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTypeTests { @@ -35,6 +41,13 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp override def resultFormat: String = "arrow" + override def beforeEach(): Unit = { + super.beforeEach() + withJdbcStatement() { statement => + checkResultSetFormat(statement, "arrow") + } + } + test("detect resultSet format") { withJdbcStatement() { statement => checkResultSetFormat(statement, "arrow") @@ -43,7 +56,89 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp } } - def checkResultSetFormat(statement: Statement, expectFormat: String): Unit = { + test("Spark session timezone format") { + withJdbcStatement() { statement => + def check(expect: String): Unit = { + val query = + """ + |SELECT + | from_utc_timestamp( + | from_unixtime( + | 1670404535000 / 1000, 'yyyy-MM-dd HH:mm:ss' + | ), + | 'GMT+08:00' + | ) + |""".stripMargin + val resultSet = statement.executeQuery(query) + assert(resultSet.next()) + assert(resultSet.getString(1) == expect) + } + + def setTimeZone(timeZone: String): Unit = { + val rs = statement.executeQuery(s"set spark.sql.session.timeZone=$timeZone") + assert(rs.next()) + } + + Seq("true", "false").foreach { timestampAsString => + statement.executeQuery( + s"set ${KyuubiConf.ARROW_BASED_ROWSET_TIMESTAMP_AS_STRING.key}=$timestampAsString") + checkArrowBasedRowSetTimestampAsString(statement, timestampAsString) + setTimeZone("UTC") + check("2022-12-07 17:15:35.0") + setTimeZone("GMT+8") + check("2022-12-08 01:15:35.0") + } + } + } + + test("assign a new execution id for arrow-based result") { + var plan: LogicalPlan = null + + val listener = new QueryExecutionListener { + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { + plan = qe.analyzed + } + override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {} + } + withJdbcStatement() { statement => + // since all the new sessions have their owner listener bus, we should register the listener + // in the current session. + registerListener(listener) + + val result = statement.executeQuery("select 1 as c1") + assert(result.next()) + assert(result.getInt("c1") == 1) + } + KyuubiSparkContextHelper.waitListenerBus(spark) + unregisterListener(listener) + assert(plan.isInstanceOf[Project]) + } + + test("arrow-based query metrics") { + var queryExecution: QueryExecution = null + + val listener = new QueryExecutionListener { + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { + queryExecution = qe + } + override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {} + } + withJdbcStatement() { statement => + registerListener(listener) + val result = statement.executeQuery("select 1 as c1") + assert(result.next()) + assert(result.getInt("c1") == 1) + } + + KyuubiSparkContextHelper.waitListenerBus(spark) + unregisterListener(listener) + + val metrics = queryExecution.executedPlan.collectLeaves().head.metrics + assert(metrics.contains("numOutputRows")) + assert(metrics("numOutputRows").value === 1) + } + + private def checkResultSetFormat(statement: Statement, expectFormat: String): Unit = { val query = s""" |SELECT '$${hivevar:${KyuubiConf.OPERATION_RESULT_FORMAT.key}}' AS col @@ -52,4 +147,34 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp assert(resultSet.next()) assert(resultSet.getString("col") === expectFormat) } + + private def checkArrowBasedRowSetTimestampAsString( + statement: Statement, + expect: String): Unit = { + val query = + s""" + |SELECT '$${hivevar:${KyuubiConf.ARROW_BASED_ROWSET_TIMESTAMP_AS_STRING.key}}' AS col + |""".stripMargin + val resultSet = statement.executeQuery(query) + assert(resultSet.next()) + assert(resultSet.getString("col") === expect) + } + + private def registerListener(listener: QueryExecutionListener): Unit = { + // since all the new sessions have their owner listener bus, we should register the listener + // in the current session. + SparkSQLEngine.currentEngine.get + .backendService + .sessionManager + .allSessions() + .foreach(_.asInstanceOf[SparkSessionImpl].spark.listenerManager.register(listener)) + } + + private def unregisterListener(listener: QueryExecutionListener): Unit = { + SparkSQLEngine.currentEngine.get + .backendService + .sessionManager + .allSessions() + .foreach(_.asInstanceOf[SparkSessionImpl].spark.listenerManager.unregister(listener)) + } } diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala index 30bbf8b77b4..af514ceb3c0 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala @@ -39,7 +39,6 @@ import org.apache.kyuubi.engine.spark.shim.SparkCatalogShim import org.apache.kyuubi.operation.{HiveMetadataTests, SparkQueryTests} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ import org.apache.kyuubi.util.KyuubiHadoopUtils -import org.apache.kyuubi.util.SparkVersionUtil.isSparkVersionAtLeast class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with SparkQueryTests { @@ -93,12 +92,12 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with .add("c17", "struct", nullable = true, "17") // since spark3.3.0 - if (SPARK_ENGINE_VERSION >= "3.3") { + if (SPARK_ENGINE_RUNTIME_VERSION >= "3.3") { schema = schema.add("c18", "interval day", nullable = true, "18") .add("c19", "interval year", nullable = true, "19") } // since spark3.4.0 - if (SPARK_ENGINE_VERSION >= "3.4") { + if (SPARK_ENGINE_RUNTIME_VERSION >= "3.4") { schema = schema.add("c20", "timestamp_ntz", nullable = true, "20") } @@ -511,7 +510,7 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with val status = tOpenSessionResp.getStatus val errorMessage = status.getErrorMessage assert(status.getStatusCode === TStatusCode.ERROR_STATUS) - if (isSparkVersionAtLeast("3.4")) { + if (SPARK_ENGINE_RUNTIME_VERSION >= "3.4") { assert(errorMessage.contains("[SCHEMA_NOT_FOUND]")) assert(errorMessage.contains(s"The schema `$dbName` cannot be found.")) } else { diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala index 803eea3e6cd..5d2ba4a0d11 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.engine.spark.schema import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} -import java.time.{Instant, LocalDate, ZoneId} +import java.time.{Instant, LocalDate} import scala.collection.JavaConverters._ @@ -30,7 +30,6 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval import org.apache.kyuubi.KyuubiFunSuite -import org.apache.kyuubi.engine.spark.schema.RowSet.toHiveString class RowSetSuite extends KyuubiFunSuite { @@ -97,10 +96,9 @@ class RowSetSuite extends KyuubiFunSuite { .add("q", "timestamp") private val rows: Seq[Row] = (0 to 10).map(genRow) ++ Seq(Row.fromSeq(Seq.fill(17)(null))) - private val zoneId: ZoneId = ZoneId.systemDefault() test("column based set") { - val tRowSet = RowSet.toColumnBasedSet(rows, schema, zoneId) + val tRowSet = RowSet.toColumnBasedSet(rows, schema) assert(tRowSet.getColumns.size() === schema.size) assert(tRowSet.getRowsSize === 0) @@ -159,22 +157,22 @@ class RowSetSuite extends KyuubiFunSuite { val decCol = cols.next().getStringVal decCol.getValues.asScala.zipWithIndex.foreach { - case (b, 11) => assert(b.isEmpty) + case (b, 11) => assert(b === "NULL") case (b, i) => assert(b === s"$i.$i") } val dateCol = cols.next().getStringVal dateCol.getValues.asScala.zipWithIndex.foreach { - case (b, 11) => assert(b.isEmpty) + case (b, 11) => assert(b === "NULL") case (b, i) => - assert(b === toHiveString((Date.valueOf(s"2018-11-${i + 1}"), DateType), zoneId)) + assert(b === RowSet.toHiveString(Date.valueOf(s"2018-11-${i + 1}") -> DateType)) } val tsCol = cols.next().getStringVal tsCol.getValues.asScala.zipWithIndex.foreach { - case (b, 11) => assert(b.isEmpty) + case (b, 11) => assert(b === "NULL") case (b, i) => assert(b === - toHiveString((Timestamp.valueOf(s"2018-11-17 13:33:33.$i"), TimestampType), zoneId)) + RowSet.toHiveString(Timestamp.valueOf(s"2018-11-17 13:33:33.$i") -> TimestampType)) } val binCol = cols.next().getBinaryVal @@ -185,29 +183,27 @@ class RowSetSuite extends KyuubiFunSuite { val arrCol = cols.next().getStringVal arrCol.getValues.asScala.zipWithIndex.foreach { - case (b, 11) => assert(b === "") - case (b, i) => assert(b === toHiveString( - (Array.fill(i)(java.lang.Double.valueOf(s"$i.$i")).toSeq, ArrayType(DoubleType)), - zoneId)) + case (b, 11) => assert(b === "NULL") + case (b, i) => assert(b === RowSet.toHiveString( + Array.fill(i)(java.lang.Double.valueOf(s"$i.$i")).toSeq -> ArrayType(DoubleType))) } val mapCol = cols.next().getStringVal mapCol.getValues.asScala.zipWithIndex.foreach { - case (b, 11) => assert(b === "") - case (b, i) => assert(b === toHiveString( - (Map(i -> java.lang.Double.valueOf(s"$i.$i")), MapType(IntegerType, DoubleType)), - zoneId)) + case (b, 11) => assert(b === "NULL") + case (b, i) => assert(b === RowSet.toHiveString( + Map(i -> java.lang.Double.valueOf(s"$i.$i")) -> MapType(IntegerType, DoubleType))) } val intervalCol = cols.next().getStringVal intervalCol.getValues.asScala.zipWithIndex.foreach { - case (b, 11) => assert(b === "") + case (b, 11) => assert(b === "NULL") case (b, i) => assert(b === new CalendarInterval(i, i, i).toString) } } test("row based set") { - val tRowSet = RowSet.toRowBasedSet(rows, schema, zoneId) + val tRowSet = RowSet.toRowBasedSet(rows, schema) assert(tRowSet.getColumnCount === 0) assert(tRowSet.getRowsSize === rows.size) val iter = tRowSet.getRowsIterator @@ -237,7 +233,7 @@ class RowSetSuite extends KyuubiFunSuite { assert(r6.get(9).getStringVal.getValue === "2018-11-06") val r7 = iter.next().getColVals - assert(r7.get(10).getStringVal.getValue === "2018-11-17 13:33:33.600") + assert(r7.get(10).getStringVal.getValue === "2018-11-17 13:33:33.6") assert(r7.get(11).getStringVal.getValue === new String( Array.fill[Byte](6)(6.toByte), StandardCharsets.UTF_8)) @@ -245,7 +241,7 @@ class RowSetSuite extends KyuubiFunSuite { val r8 = iter.next().getColVals assert(r8.get(12).getStringVal.getValue === Array.fill(7)(7.7d).mkString("[", ",", "]")) assert(r8.get(13).getStringVal.getValue === - toHiveString((Map(7 -> 7.7d), MapType(IntegerType, DoubleType)), zoneId)) + RowSet.toHiveString(Map(7 -> 7.7d) -> MapType(IntegerType, DoubleType))) val r9 = iter.next().getColVals assert(r9.get(14).getStringVal.getValue === new CalendarInterval(8, 8, 8).toString) @@ -253,7 +249,7 @@ class RowSetSuite extends KyuubiFunSuite { test("to row set") { TProtocolVersion.values().foreach { proto => - val set = RowSet.toTRowSet(rows, schema, proto, zoneId) + val set = RowSet.toTRowSet(rows, schema, proto) if (proto.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { assert(!set.isSetColumns, proto.toString) assert(set.isSetRows, proto.toString) diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/udf/KyuubiDefinedFunctionSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/udf/KyuubiDefinedFunctionSuite.scala index 4d38bc363b3..f355e1e6b51 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/udf/KyuubiDefinedFunctionSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/udf/KyuubiDefinedFunctionSuite.scala @@ -19,9 +19,7 @@ package org.apache.kyuubi.engine.spark.udf import java.nio.file.Paths -import scala.collection.mutable.ArrayBuffer - -import org.apache.kyuubi.{KyuubiFunSuite, TestUtils, Utils} +import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, MarkdownUtils, Utils} // scalastyle:off line.size.limit /** @@ -30,12 +28,12 @@ import org.apache.kyuubi.{KyuubiFunSuite, TestUtils, Utils} * * To run the entire test suite: * {{{ - * build/mvn clean install -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.engine.spark.udf.KyuubiDefinedFunctionSuite + * build/mvn clean test -pl externals/kyuubi-spark-sql-engine -am -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.engine.spark.udf.KyuubiDefinedFunctionSuite * }}} * * To re-generate golden files for entire suite, run: * {{{ - * KYUUBI_UPDATE=1 build/mvn clean install -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.engine.spark.udf.KyuubiDefinedFunctionSuite + * KYUUBI_UPDATE=1 build/mvn clean test -pl externals/kyuubi-spark-sql-engine -am -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.engine.spark.udf.KyuubiDefinedFunctionSuite * }}} */ // scalastyle:on line.size.limit @@ -48,40 +46,26 @@ class KyuubiDefinedFunctionSuite extends KyuubiFunSuite { .toAbsolutePath test("verify or update kyuubi spark sql functions") { - val newOutput = new ArrayBuffer[String]() - newOutput += "" - newOutput += "" - newOutput += "" - newOutput += "" - newOutput += "# Auxiliary SQL Functions" - newOutput += "" - newOutput += "Kyuubi provides several auxiliary SQL functions as supplement to Spark's " + - "[Built-in Functions](https://spark.apache.org/docs/latest/api/sql/index.html#" + - "built-in-functions)" - newOutput += "" - newOutput += "Name | Description | Return Type | Since" - newOutput += "--- | --- | --- | ---" - KDFRegistry + val builder = MarkdownBuilder(licenced = true, getClass.getName) + + builder + .line("# Auxiliary SQL Functions") + .line("""Kyuubi provides several auxiliary SQL functions as supplement to Spark's + | [Built-in Functions](https://spark.apache.org/docs/latest/api/sql/index.html# + |built-in-functions)""") + .lines(""" + | Name | Description | Return Type | Since + | --- | --- | --- | --- + | + |""") KDFRegistry.registeredFunctions.foreach { func => - newOutput += s"${func.name} | ${func.description} | ${func.returnType} | ${func.since}" + builder.line(s"${func.name} | ${func.description} | ${func.returnType} | ${func.since}") } - newOutput += "" - TestUtils.verifyOutput(markdown, newOutput, getClass.getCanonicalName) + + MarkdownUtils.verifyOutput( + markdown, + builder, + getClass.getCanonicalName, + "externals/kyuubi-spark-sql-engine") } } diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/KyuubiSparkContextHelper.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/KyuubiSparkContextHelper.scala new file mode 100644 index 00000000000..8293123ead7 --- /dev/null +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/KyuubiSparkContextHelper.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import org.apache.spark.sql.SparkSession + +/** + * A place to invoke non-public APIs of [[SparkContext]], for test only. + */ +object KyuubiSparkContextHelper { + + def waitListenerBus(spark: SparkSession): Unit = { + spark.sparkContext.listenerBus.waitUntilEmpty() + } +} diff --git a/externals/kyuubi-trino-engine/pom.xml b/externals/kyuubi-trino-engine/pom.xml index 7e2f67370e6..7aea8f33a6f 100644 --- a/externals/kyuubi-trino-engine/pom.xml +++ b/externals/kyuubi-trino-engine/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../../pom.xml diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala index a19d74d586c..81f973b1b5e 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala @@ -30,11 +30,12 @@ import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtoco import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.Utils.currentUser import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.trino.{TrinoConf, TrinoContext, TrinoStatement} import org.apache.kyuubi.engine.trino.event.TrinoSessionEvent import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{Operation, OperationHandle} -import org.apache.kyuubi.session.{AbstractSession, SessionManager} +import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} class TrinoSessionImpl( protocol: TProtocolVersion, @@ -45,6 +46,9 @@ class TrinoSessionImpl( sessionManager: SessionManager) extends AbstractSession(protocol, user, password, ipAddress, conf, sessionManager) { + override val handle: SessionHandle = + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) + var trinoContext: TrinoContext = _ private var clientSession: ClientSession = _ private var catalogName: String = null diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala index 6d56d5c0541..e18b8f75817 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala @@ -20,6 +20,7 @@ package org.apache.kyuubi.engine.trino.session import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.trino.TrinoSqlEngine import org.apache.kyuubi.engine.trino.operation.TrinoOperationManager @@ -36,7 +37,10 @@ class TrinoSessionManager password: String, ipAddress: String, conf: Map[String, String]): Session = { - new TrinoSessionImpl(protocol, user, password, ipAddress, conf, this) + conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).flatMap( + getSessionOption).getOrElse { + new TrinoSessionImpl(protocol, user, password, ipAddress, conf, this) + } } override def closeSession(sessionHandle: SessionHandle): Unit = { diff --git a/integration-tests/kyuubi-flink-it/pom.xml b/integration-tests/kyuubi-flink-it/pom.xml index 7f9a84a85bc..c6a55c62cb6 100644 --- a/integration-tests/kyuubi-flink-it/pom.xml +++ b/integration-tests/kyuubi-flink-it/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi integration-tests - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml @@ -75,7 +75,7 @@ org.apache.flink - flink-table-runtime${flink.module.scala.suffix} + flink-table-runtime test diff --git a/integration-tests/kyuubi-hive-it/pom.xml b/integration-tests/kyuubi-hive-it/pom.xml index 8b9813a2be0..ff9a6b35ea6 100644 --- a/integration-tests/kyuubi-hive-it/pom.xml +++ b/integration-tests/kyuubi-hive-it/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi integration-tests - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/integration-tests/kyuubi-jdbc-it/pom.xml b/integration-tests/kyuubi-jdbc-it/pom.xml index 0aef12fb3f3..2d95de78ed8 100644 --- a/integration-tests/kyuubi-jdbc-it/pom.xml +++ b/integration-tests/kyuubi-jdbc-it/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi integration-tests - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/integration-tests/kyuubi-kubernetes-it/pom.xml b/integration-tests/kyuubi-kubernetes-it/pom.xml index cb04e73c1d5..a796ccab59a 100644 --- a/integration-tests/kyuubi-kubernetes-it/pom.xml +++ b/integration-tests/kyuubi-kubernetes-it/pom.xml @@ -15,17 +15,15 @@ ~ See the License for the specific language governing permissions and ~ limitations under the License. --> - - + 4.0.0 org.apache.kyuubi integration-tests - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml - 4.0.0 kubernetes-integration-tests_2.12 Kyuubi Test Kubernetes IT @@ -62,12 +60,6 @@ test - - io.fabric8 - kubernetes-client - test - - org.apache.hadoop hadoop-client-minicluster diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/MiniKube.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/MiniKube.scala index cd373873a6a..f4cd557bb0f 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/MiniKube.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/MiniKube.scala @@ -17,7 +17,11 @@ package org.apache.kyuubi.kubernetes.test -import io.fabric8.kubernetes.client.{Config, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientBuilder} +import io.fabric8.kubernetes.client.okhttp.OkHttpClientFactory +import okhttp3.{Dispatcher, OkHttpClient} + +import org.apache.kyuubi.util.ThreadUtils /** * This code copied from Aapache Spark @@ -44,7 +48,7 @@ object MiniKube { executeMinikube(true, "ip").head } - def getKubernetesClient: DefaultKubernetesClient = { + def getKubernetesClient: KubernetesClient = { // only the three-part version number is matched (the optional suffix like "-beta.0" is dropped) val versionArrayOpt = "\\d+\\.\\d+\\.\\d+".r .findFirstIn(minikubeVersionString.split(VERSION_PREFIX)(1)) @@ -65,7 +69,18 @@ object MiniKube { "For minikube version a three-part version number is expected (the optional " + "non-numeric suffix is intentionally dropped)") } + // https://github.com/fabric8io/kubernetes-client/issues/3547 + val dispatcher = new Dispatcher( + ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher")) + val factoryWithCustomDispatcher = new OkHttpClientFactory() { + override protected def additionalConfig(builder: OkHttpClient.Builder): Unit = { + builder.dispatcher(dispatcher) + } + } - new DefaultKubernetesClient(Config.autoConfigure("minikube")) + new KubernetesClientBuilder() + .withConfig(Config.autoConfigure("minikube")) + .withHttpClientFactory(factoryWithCustomDispatcher) + .build() } } diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/WithKyuubiServerOnKubernetes.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/WithKyuubiServerOnKubernetes.scala index ed9cbce09fe..595fdd4314e 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/WithKyuubiServerOnKubernetes.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/WithKyuubiServerOnKubernetes.scala @@ -18,14 +18,14 @@ package org.apache.kyuubi.kubernetes.test import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.DefaultKubernetesClient +import io.fabric8.kubernetes.client.KubernetesClient import org.apache.kyuubi.KyuubiFunSuite trait WithKyuubiServerOnKubernetes extends KyuubiFunSuite { protected def connectionConf: Map[String, String] = Map.empty - lazy val miniKubernetesClient: DefaultKubernetesClient = MiniKube.getKubernetesClient + lazy val miniKubernetesClient: KubernetesClient = MiniKube.getKubernetesClient lazy val kyuubiPod: Pod = miniKubernetesClient.pods().withName("kyuubi-test").get() lazy val kyuubiServerIp: String = kyuubiPod.getStatus.getPodIP lazy val miniKubeIp: String = MiniKube.getIp diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala index c8894679d35..bc7c98a80c7 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala @@ -54,7 +54,7 @@ class KyuubiOnKubernetesWithSparkTestsBase extends WithKyuubiServerOnKubernetes super.connectionConf ++ Map( "spark.master" -> s"k8s://$miniKubeApiMaster", - "spark.kubernetes.container.image" -> "apache/spark:3.3.1", + "spark.kubernetes.container.image" -> "apache/spark:v3.3.2", "spark.executor.memory" -> "512M", "spark.driver.memory" -> "1024M", "spark.kubernetes.driver.request.cores" -> "250m", diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala index e63c3704599..5141ff4d7ea 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala @@ -17,13 +17,16 @@ package org.apache.kyuubi.kubernetes.test.spark +import java.util.UUID + import scala.collection.JavaConverters._ import scala.concurrent.duration._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.net.NetUtils -import org.apache.kyuubi.{BatchTestHelper, KyuubiException, Logging, Utils, WithKyuubiServer, WithSimpleDFSService} +import org.apache.kyuubi._ +import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_HOST import org.apache.kyuubi.engine.{ApplicationInfo, ApplicationOperation, KubernetesApplicationOperation} @@ -31,7 +34,7 @@ import org.apache.kyuubi.engine.ApplicationState.{FAILED, NOT_FOUND, RUNNING} import org.apache.kyuubi.engine.spark.SparkProcessBuilder import org.apache.kyuubi.kubernetes.test.MiniKube import org.apache.kyuubi.operation.SparkQueryTests -import org.apache.kyuubi.session.{KyuubiBatchSessionImpl, KyuubiSessionManager} +import org.apache.kyuubi.session.KyuubiSessionManager import org.apache.kyuubi.util.Validator.KUBERNETES_EXECUTOR_POD_NAME_PREFIX import org.apache.kyuubi.zookeeper.ZookeeperConf.ZK_CLIENT_PORT_ADDRESS @@ -45,7 +48,7 @@ abstract class SparkOnKubernetesSuiteBase // TODO Support more Spark version // Spark official docker image: https://hub.docker.com/r/apache/spark/tags KyuubiConf().set("spark.master", s"k8s://$apiServerAddress") - .set("spark.kubernetes.container.image", "apache/spark:v3.2.1") + .set("spark.kubernetes.container.image", "apache/spark:v3.3.2") .set("spark.kubernetes.container.image.pullPolicy", "IfNotPresent") .set("spark.executor.instances", "1") .set("spark.executor.memory", "512M") @@ -122,6 +125,7 @@ class SparkClusterModeOnKubernetesSuite override protected def jdbcUrl: String = getJdbcUrl } +// [KYUUBI #4467] KubernetesApplicationOperator doesn't support client mode class KyuubiOperationKubernetesClusterClientModeSuite extends SparkClientModeOnKubernetesSuiteBase { private lazy val k8sOperation: KubernetesApplicationOperation = { @@ -133,8 +137,9 @@ class KyuubiOperationKubernetesClusterClientModeSuite private def sessionManager: KyuubiSessionManager = server.backendService.sessionManager.asInstanceOf[KyuubiSessionManager] - test("Spark Client Mode On Kubernetes Kyuubi KubernetesApplicationOperation Suite") { - val batchRequest = newSparkBatchRequest(conf.getAll) + ignore("Spark Client Mode On Kubernetes Kyuubi KubernetesApplicationOperation Suite") { + val batchRequest = newSparkBatchRequest(conf.getAll ++ Map( + KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString)) val sessionHandle = sessionManager.openBatchSession( "kyuubi", @@ -193,7 +198,8 @@ class KyuubiOperationKubernetesClusterClusterModeSuite "spark.kubernetes.driver.pod.name", driverPodNamePrefix + "-" + System.currentTimeMillis()) - val batchRequest = newSparkBatchRequest(conf.getAll) + val batchRequest = newSparkBatchRequest(conf.getAll ++ Map( + KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString)) val sessionHandle = sessionManager.openBatchSession( "runner", @@ -202,19 +208,18 @@ class KyuubiOperationKubernetesClusterClusterModeSuite batchRequest.getConf.asScala.toMap, batchRequest) - val session = sessionManager.getSession(sessionHandle).asInstanceOf[KyuubiBatchSessionImpl] - val batchJobSubmissionOp = session.batchJobSubmissionOp - - eventually(timeout(3.minutes), interval(50.milliseconds)) { - val appInfo = batchJobSubmissionOp.currentApplicationInfo - assert(appInfo.nonEmpty) - assert(appInfo.exists(_.state == RUNNING)) - assert(appInfo.exists(_.name.startsWith(driverPodNamePrefix))) + // wait for driver pod start + eventually(timeout(3.minutes), interval(5.second)) { + // trigger k8sOperation init here + val appInfo = k8sOperation.getApplicationInfoByTag(sessionHandle.identifier.toString) + assert(appInfo.state == RUNNING) + assert(appInfo.name.startsWith(driverPodNamePrefix)) } val killResponse = k8sOperation.killApplicationByTag(sessionHandle.identifier.toString) assert(killResponse._1) - assert(killResponse._2 startsWith "Operation of deleted appId:") + assert(killResponse._2 endsWith "is completed") + assert(killResponse._2 contains sessionHandle.identifier.toString) eventually(timeout(3.minutes), interval(50.milliseconds)) { val appInfo = k8sOperation.getApplicationInfoByTag(sessionHandle.identifier.toString) diff --git a/integration-tests/kyuubi-trino-it/pom.xml b/integration-tests/kyuubi-trino-it/pom.xml index e62e58d1d23..107d621b075 100644 --- a/integration-tests/kyuubi-trino-it/pom.xml +++ b/integration-tests/kyuubi-trino-it/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi integration-tests - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/integration-tests/kyuubi-trino-it/src/test/scala/org/apache/kyuubi/it/trino/server/TrinoFrontendSuite.scala b/integration-tests/kyuubi-trino-it/src/test/scala/org/apache/kyuubi/it/trino/server/TrinoFrontendSuite.scala new file mode 100644 index 00000000000..4a175a28b7a --- /dev/null +++ b/integration-tests/kyuubi-trino-it/src/test/scala/org/apache/kyuubi/it/trino/server/TrinoFrontendSuite.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.trino.server + +import scala.util.control.NonFatal + +import org.apache.kyuubi.WithKyuubiServer +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.operation.SparkMetadataTests + +/** + * This test is for Trino jdbc driver with Kyuubi Server and Spark engine: + * + * ------------------------------------------------------------- + * | JDBC | + * | Trino-driver ----> Kyuubi Server --> Spark Engine | + * | | + * ------------------------------------------------------------- + */ +class TrinoFrontendSuite extends WithKyuubiServer with SparkMetadataTests { + + test("execute statement - select 11 where 1=1") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery("SELECT 11 where 1<1") + while (resultSet.next()) { + assert(resultSet.getInt(1) === 11) + } + } + } + + test("execute preparedStatement - select 11 where 1 = 1") { + withJdbcPrepareStatement("select 11 where 1 = ? ") { statement => + statement.setInt(1, 1) + val rs = statement.executeQuery() + while (rs.next()) { + assert(rs.getInt(1) == 11) + } + } + } + + override protected val conf: KyuubiConf = { + KyuubiConf().set(KyuubiConf.FRONTEND_PROTOCOLS, Seq("TRINO")) + } + + override protected def jdbcUrl: String = { + s"jdbc:trino://${server.frontendServices.head.connectionUrl}/;" + } + + // trino jdbc driver requires enable SSL if specify password + override protected val password: String = "" + + override def beforeAll(): Unit = { + super.beforeAll() + // eagerly start spark engine before running test, it's a workaround for trino jdbc driver + // since it does not support changing http connect timeout + try { + withJdbcStatement() { statement => + statement.execute("SELECT 1") + } + } catch { + case NonFatal(e) => + } + } +} diff --git a/integration-tests/kyuubi-zookeeper-it/pom.xml b/integration-tests/kyuubi-zookeeper-it/pom.xml index eaeff5898a7..bded1585b71 100644 --- a/integration-tests/kyuubi-zookeeper-it/pom.xml +++ b/integration-tests/kyuubi-zookeeper-it/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi integration-tests - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 4e3431afb90..b6a48daaedc 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT integration-tests diff --git a/kyuubi-assembly/pom.xml b/kyuubi-assembly/pom.xml index 725126f84f6..0524470a20d 100644 --- a/kyuubi-assembly/pom.xml +++ b/kyuubi-assembly/pom.xml @@ -22,7 +22,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/kyuubi-common/pom.xml b/kyuubi-common/pom.xml index fc259eb07d0..d62761d72b3 100644 --- a/kyuubi-common/pom.xml +++ b/kyuubi-common/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml @@ -31,6 +31,12 @@ https://kyuubi.apache.org/ + + com.vladsch.flexmark + flexmark-all + test + + org.scala-lang scala-library @@ -82,6 +88,11 @@ runtime + + org.antlr + ST4 + + org.apache.commons commons-lang3 @@ -135,6 +146,12 @@ test + + org.scalatestplus + mockito-4-6_${scala.binary.version} + test + + com.google.guava failureaccess diff --git a/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynConstructors.java b/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynConstructors.java index 7495ce0ffb4..59c79b88502 100644 --- a/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynConstructors.java +++ b/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynConstructors.java @@ -119,6 +119,7 @@ public static Builder builder(Class baseClass) { return new Builder(baseClass); } + @SuppressWarnings("rawtypes") public static class Builder { private final Class baseClass; private ClassLoader loader = Thread.currentThread().getContextClassLoader(); @@ -182,7 +183,7 @@ public Builder hiddenImpl(Class... types) { return this; } - @SuppressWarnings("unchecked") + @SuppressWarnings({"unchecked", "rawtypes"}) public Builder hiddenImpl(String className, Class... types) { // don't do any work if an implementation has been found if (ctor != null) { diff --git a/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynFields.java b/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynFields.java index 39c83b1621a..9430d54e9bb 100644 --- a/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynFields.java +++ b/kyuubi-common/src/main/java/org/apache/kyuubi/reflection/DynFields.java @@ -300,6 +300,7 @@ public Builder hiddenImpl(String className, String fieldName) { * @see Class#forName(String) * @see Class#getField(String) */ + @SuppressWarnings("rawtypes") public Builder hiddenImpl(Class targetClass, String fieldName) { // don't do any work if an implementation has been found if (field != null || targetClass == null) { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/Logging.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/Logging.scala index 4944b9fcc14..1df598132fb 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/Logging.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/Logging.scala @@ -22,7 +22,6 @@ import org.apache.logging.log4j.core.{Logger => Log4jLogger, LoggerContext} import org.apache.logging.log4j.core.config.DefaultConfiguration import org.slf4j.{Logger, LoggerFactory} import org.slf4j.bridge.SLF4JBridgeHandler -import org.slf4j.impl.StaticLoggerBinder import org.apache.kyuubi.util.ClassUtils @@ -54,12 +53,24 @@ trait Logging { } } + def debug(message: => Any, t: Throwable): Unit = { + if (logger.isDebugEnabled) { + logger.debug(message.toString, t) + } + } + def info(message: => Any): Unit = { if (logger.isInfoEnabled) { logger.info(message.toString) } } + def info(message: => Any, t: Throwable): Unit = { + if (logger.isInfoEnabled) { + logger.info(message.toString, t) + } + } + def warn(message: => Any): Unit = { if (logger.isWarnEnabled) { logger.warn(message.toString) @@ -105,16 +116,16 @@ object Logging { // This distinguishes the log4j 1.2 binding, currently // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently // org.apache.logging.slf4j.Log4jLoggerFactory - val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr - "org.slf4j.impl.Log4jLoggerFactory".equals(binderClass) + "org.slf4j.impl.Log4jLoggerFactory" + .equals(LoggerFactory.getILoggerFactory.getClass.getName) } private[kyuubi] def isLog4j2: Boolean = { // This distinguishes the log4j 1.2 binding, currently // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently // org.apache.logging.slf4j.Log4jLoggerFactory - val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr - "org.apache.logging.slf4j.Log4jLoggerFactory".equals(binderClass) + "org.apache.logging.slf4j.Log4jLoggerFactory" + .equals(LoggerFactory.getILoggerFactory.getClass.getName) } /** diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala index 33a4e116e95..3a03682ff1b 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala @@ -20,8 +20,10 @@ package org.apache.kyuubi import java.io._ import java.net.{Inet4Address, InetAddress, NetworkInterface} import java.nio.charset.StandardCharsets -import java.nio.file.{Files, Path, Paths} -import java.util.{Properties, TimeZone, UUID} +import java.nio.file.{Files, Path, Paths, StandardCopyOption} +import java.text.SimpleDateFormat +import java.util.{Date, Properties, TimeZone, UUID} +import java.util.concurrent.atomic.AtomicLong import scala.collection.JavaConverters._ import scala.sys.process._ @@ -40,6 +42,12 @@ object Utils extends Logging { import org.apache.kyuubi.config.KyuubiConf._ + /** + * An atomic counter used in writeToTempFile method + * avoiding duplication in temporary file name generation + */ + private lazy val tempFileIdCounter: AtomicLong = new AtomicLong(0) + def strToSeq(s: String, sp: String = ","): Seq[String] = { require(s != null) s.split(sp).map(_.trim).filter(_.nonEmpty) @@ -147,6 +155,50 @@ object Utils extends Logging { dir } + /** + * Copies bytes from an InputStream source to a newly created temporary file + * created in the directory destination. The temporary file will be created + * with new name by adding random identifiers before original file name's suffix, + * and the file will be deleted on JVM exit. The directories up to destination + * will be created if they don't already exist. destination will be overwritten + * if it already exists. The source stream is closed. + * @param source the InputStream to copy bytes from, must not be null, will be closed + * @param dir the directory path for temp file creation + * @param fileName original file name with suffix + * @return the created temp file in dir + */ + def writeToTempFile(source: InputStream, dir: Path, fileName: String): File = { + try { + if (source == null) { + throw new IOException("the source inputstream is null") + } + if (!dir.toFile.exists()) { + dir.toFile.mkdirs() + } + val (prefix, suffix) = fileName.lastIndexOf(".") match { + case i if i > 0 => (fileName.substring(0, i), fileName.substring(i)) + case _ => (fileName, "") + } + val currentTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()) + val identifier = s"$currentTime-${tempFileIdCounter.incrementAndGet()}" + val filePath = Paths.get(dir.toString, s"$prefix-$identifier$suffix") + try { + Files.copy(source, filePath, StandardCopyOption.REPLACE_EXISTING) + } finally { + source.close() + } + val file = filePath.toFile + file.deleteOnExit() + file + } catch { + case e: Exception => + error( + s"failed to write to temp file in path $dir, original file name: $fileName", + e) + throw e + } + } + def currentUser: String = UserGroupInformation.getCurrentUser.getShortUserName private val shortVersionRegex = """^(\d+\.\d+\.\d+)(.*)?$""".r @@ -169,6 +221,11 @@ object Utils extends Logging { */ val isWindows: Boolean = SystemUtils.IS_OS_WINDOWS + /** + * Whether the underlying operating system is MacOS. + */ + val isMac: Boolean = SystemUtils.IS_OS_MAC + /** * Indicates whether Kyuubi is currently running unit tests. */ @@ -335,4 +392,19 @@ object Utils extends Logging { Option(Thread.currentThread().getContextClassLoader).getOrElse(getKyuubiClassLoader) def isOnK8s: Boolean = Files.exists(Paths.get("/var/run/secrets/kubernetes.io")) + + /** + * Return a nice string representation of the exception. It will call "printStackTrace" to + * recursively generate the stack trace including the exception and its causes. + */ + def prettyPrint(e: Throwable): String = { + if (e == null) { + "" + } else { + // Use e.printStackTrace here because e.getStackTrace doesn't include the cause + val stringWriter = new StringWriter() + e.printStackTrace(new PrintWriter(stringWriter)) + stringWriter.toString + } + } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 0e50e132ef7..b5229e2ad4f 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -284,7 +284,7 @@ object KyuubiConf { .createOptional val KINIT_INTERVAL: ConfigEntry[Long] = buildConf("kyuubi.kinit.interval") - .doc("How often will Kyuubi server run `kinit -kt [keytab] [principal]` to renew the" + + .doc("How often will the Kyuubi server run `kinit -kt [keytab] [principal]` to renew the" + " local Kerberos credentials cache") .version("1.0.0") .serverOnly @@ -320,7 +320,7 @@ object KyuubiConf { val CREDENTIALS_UPDATE_WAIT_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.credentials.update.wait.timeout") - .doc("How long to wait until credentials are ready.") + .doc("How long to wait until the credentials are ready.") .version("1.5.0") .timeConf .checkValue(t => t > 0, "must be positive integer") @@ -336,7 +336,7 @@ object KyuubiConf { val CREDENTIALS_IDLE_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.credentials.idle.timeout") - .doc("inactive users' credentials will be expired after a configured timeout") + .doc("The inactive users' credentials will be expired after a configured timeout") .version("1.6.0") .timeConf .checkValue(_ >= Duration.ofSeconds(3).toMillis, "Minimum 3 seconds") @@ -376,7 +376,7 @@ object KyuubiConf { val FRONTEND_PROTOCOLS: ConfigEntry[Seq[String]] = buildConf("kyuubi.frontend.protocols") - .doc("A comma separated list for all frontend protocols " + + .doc("A comma-separated list for all frontend protocols " + "
      " + "
    • THRIFT_BINARY - HiveServer2 compatible thrift binary protocol.
    • " + "
    • THRIFT_HTTP - HiveServer2 compatible thrift http protocol.
    • " + @@ -391,7 +391,9 @@ object KyuubiConf { .checkValue( _.forall(FrontendProtocols.values.map(_.toString).contains), s"the frontend protocol should be one or more of ${FrontendProtocols.values.mkString(",")}") - .createWithDefault(Seq(FrontendProtocols.THRIFT_BINARY.toString)) + .createWithDefault(Seq( + FrontendProtocols.THRIFT_BINARY.toString, + FrontendProtocols.REST.toString)) val FRONTEND_BIND_HOST: OptionalConfigEntry[String] = buildConf("kyuubi.frontend.bind.host") .doc("Hostname or IP of the machine on which to run the frontend services.") @@ -403,7 +405,7 @@ object KyuubiConf { val FRONTEND_THRIFT_BINARY_BIND_HOST: ConfigEntry[Option[String]] = buildConf("kyuubi.frontend.thrift.binary.bind.host") .doc("Hostname or IP of the machine on which to run the thrift frontend service " + - "via binary protocol.") + "via the binary protocol.") .version("1.4.0") .serverOnly .fallbackConf(FRONTEND_BIND_HOST) @@ -454,7 +456,7 @@ object KyuubiConf { val FRONTEND_THRIFT_BINARY_SSL_INCLUDE_CIPHER_SUITES: ConfigEntry[Seq[String]] = buildConf("kyuubi.frontend.thrift.binary.ssl.include.ciphersuites") - .doc("A comma separated list of include SSL cipher suite names for thrift binary frontend.") + .doc("A comma-separated list of include SSL cipher suite names for thrift binary frontend.") .version("1.7.0") .stringConf .toSequence() @@ -463,7 +465,7 @@ object KyuubiConf { @deprecated("using kyuubi.frontend.thrift.binary.bind.port instead", "1.4.0") val FRONTEND_BIND_PORT: ConfigEntry[Int] = buildConf("kyuubi.frontend.bind.port") .doc("(deprecated) Port of the machine on which to run the thrift frontend service " + - "via binary protocol.") + "via the binary protocol.") .version("1.0.0") .serverOnly .intConf @@ -472,7 +474,8 @@ object KyuubiConf { val FRONTEND_THRIFT_BINARY_BIND_PORT: ConfigEntry[Int] = buildConf("kyuubi.frontend.thrift.binary.bind.port") - .doc("Port of the machine on which to run the thrift frontend service via binary protocol.") + .doc("Port of the machine on which to run the thrift frontend service " + + "via the binary protocol.") .version("1.4.0") .serverOnly .fallbackConf(FRONTEND_BIND_PORT) @@ -496,7 +499,7 @@ object KyuubiConf { val FRONTEND_MIN_WORKER_THREADS: ConfigEntry[Int] = buildConf("kyuubi.frontend.min.worker.threads") - .doc("(deprecated) Minimum number of threads in the of frontend worker thread pool for " + + .doc("(deprecated) Minimum number of threads in the frontend worker thread pool for " + "the thrift frontend service") .version("1.0.0") .intConf @@ -504,14 +507,14 @@ object KyuubiConf { val FRONTEND_THRIFT_MIN_WORKER_THREADS: ConfigEntry[Int] = buildConf("kyuubi.frontend.thrift.min.worker.threads") - .doc("Minimum number of threads in the of frontend worker thread pool for the thrift " + + .doc("Minimum number of threads in the frontend worker thread pool for the thrift " + "frontend service") .version("1.4.0") .fallbackConf(FRONTEND_MIN_WORKER_THREADS) val FRONTEND_MAX_WORKER_THREADS: ConfigEntry[Int] = buildConf("kyuubi.frontend.max.worker.threads") - .doc("(deprecated) Maximum number of threads in the of frontend worker thread pool for " + + .doc("(deprecated) Maximum number of threads in the frontend worker thread pool for " + "the thrift frontend service") .version("1.0.0") .intConf @@ -519,14 +522,14 @@ object KyuubiConf { val FRONTEND_THRIFT_MAX_WORKER_THREADS: ConfigEntry[Int] = buildConf("kyuubi.frontend.thrift.max.worker.threads") - .doc("Maximum number of threads in the of frontend worker thread pool for the thrift " + + .doc("Maximum number of threads in the frontend worker thread pool for the thrift " + "frontend service") .version("1.4.0") .fallbackConf(FRONTEND_MAX_WORKER_THREADS) val FRONTEND_REST_MAX_WORKER_THREADS: ConfigEntry[Int] = buildConf("kyuubi.frontend.rest.max.worker.threads") - .doc("Maximum number of threads in the of frontend worker thread pool for the rest " + + .doc("Maximum number of threads in the frontend worker thread pool for the rest " + "frontend service") .version("1.6.2") .fallbackConf(FRONTEND_MAX_WORKER_THREADS) @@ -624,7 +627,7 @@ object KyuubiConf { val FRONTEND_THRIFT_HTTP_COOKIE_AUTH_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.frontend.thrift.http.cookie.auth.enabled") .doc("When true, Kyuubi in HTTP transport mode, " + - "will use cookie based authentication mechanism") + "will use cookie-based authentication mechanism") .version("1.6.0") .booleanConf .createWithDefault(true) @@ -659,7 +662,7 @@ object KyuubiConf { val FRONTEND_THRIFT_HTTP_XSRF_FILTER_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.frontend.thrift.http.xsrf.filter.enabled") - .doc("If enabled, Kyuubi will block any requests made to it over http " + + .doc("If enabled, Kyuubi will block any requests made to it over HTTP " + "if an X-XSRF-HEADER header is not present") .version("1.6.0") .booleanConf @@ -699,7 +702,7 @@ object KyuubiConf { val FRONTEND_THRIFT_HTTP_SSL_EXCLUDE_CIPHER_SUITES: ConfigEntry[Seq[String]] = buildConf("kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites") - .doc("A comma separated list of exclude SSL cipher suite names for thrift http frontend.") + .doc("A comma-separated list of exclude SSL cipher suite names for thrift http frontend.") .version("1.7.0") .stringConf .toSequence() @@ -715,18 +718,19 @@ object KyuubiConf { val FRONTEND_PROXY_HTTP_CLIENT_IP_HEADER: ConfigEntry[String] = buildConf("kyuubi.frontend.proxy.http.client.ip.header") - .doc("The http header to record the real client ip address. If your server is behind a load" + + .doc("The HTTP header to record the real client IP address. If your server is behind a load" + " balancer or other proxy, the server will see this load balancer or proxy IP address as" + " the client IP address, to get around this common issue, most load balancers or proxies" + " offer the ability to record the real remote IP address in an HTTP header that will be" + " added to the request for other devices to use. Note that, because the header value can" + - " be specified to any ip address, so it will not be used for authentication.") + " be specified to any IP address, so it will not be used for authentication.") .version("1.6.0") .stringConf .createWithDefault("X-Real-IP") val AUTHENTICATION_METHOD: ConfigEntry[Seq[String]] = buildConf("kyuubi.authentication") - .doc("A comma separated list of client authentication types.
        " + + .doc("A comma-separated list of client authentication types." + + "
          " + "
        • NOSASL: raw transport.
        • " + "
        • NONE: no authentication check.
        • " + "
        • KERBEROS: Kerberos/GSSAPI authentication.
        • " + @@ -734,11 +738,28 @@ object KyuubiConf { "
        • JDBC: JDBC query authentication.
        • " + "
        • LDAP: Lightweight Directory Access Protocol authentication.
        • " + "
        " + - " Note that: For KERBEROS, it is SASL/GSSAPI mechanism," + - " and for NONE, CUSTOM and LDAP, they are all SASL/PLAIN mechanism." + - " If only NOSASL is specified, the authentication will be NOSASL." + - " For SASL authentication, KERBEROS and PLAIN auth type are supported at the same time," + - " and only the first specified PLAIN auth type is valid.") + "The following tree describes the catalog of each option." + + "
          " + + "
        • NOSASL
        • " + + "
        • SASL" + + "
            " + + "
          • SASL/PLAIN
          • " + + "
              " + + "
            • NONE
            • " + + "
            • LDAP
            • " + + "
            • JDBC
            • " + + "
            • CUSTOM
            • " + + "
            " + + "
          • SASL/GSSAPI" + + "
              " + + "
            • KERBEROS
            • " + + "
            " + + "
          • " + + "
          " + + "
        • " + + "
        " + + " Note that: for SASL authentication, KERBEROS and PLAIN auth types are supported" + + " at the same time, and only the first specified PLAIN auth type is valid.") .version("1.0.0") .serverOnly .stringConf @@ -754,6 +775,7 @@ object KyuubiConf { .doc("User-defined authentication implementation of " + "org.apache.kyuubi.service.authentication.PasswdAuthenticationProvider") .version("1.3.0") + .serverOnly .stringConf .createOptional @@ -761,13 +783,16 @@ object KyuubiConf { buildConf("kyuubi.authentication.ldap.url") .doc("SPACE character separated LDAP connection URL(s).") .version("1.0.0") + .serverOnly .stringConf .createOptional - val AUTHENTICATION_LDAP_BASEDN: OptionalConfigEntry[String] = - buildConf("kyuubi.authentication.ldap.base.dn") + val AUTHENTICATION_LDAP_BASE_DN: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.baseDN") + .withAlternative("kyuubi.authentication.ldap.base.dn") .doc("LDAP base DN.") - .version("1.0.0") + .version("1.7.0") + .serverOnly .stringConf .createOptional @@ -775,21 +800,129 @@ object KyuubiConf { buildConf("kyuubi.authentication.ldap.domain") .doc("LDAP domain.") .version("1.0.0") + .serverOnly .stringConf .createOptional - val AUTHENTICATION_LDAP_GUIDKEY: ConfigEntry[String] = + val AUTHENTICATION_LDAP_GROUP_DN_PATTERN: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.groupDNPattern") + .doc("COLON-separated list of patterns to use to find DNs for group entities in " + + "this directory. Use %s where the actual group name is to be substituted for. " + + "For example: CN=%s,CN=Groups,DC=subdomain,DC=domain,DC=com.") + .version("1.7.0") + .serverOnly + .stringConf + .createOptional + + val AUTHENTICATION_LDAP_USER_DN_PATTERN: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.userDNPattern") + .doc("COLON-separated list of patterns to use to find DNs for users in this directory. " + + "Use %s where the actual group name is to be substituted for. " + + "For example: CN=%s,CN=Users,DC=subdomain,DC=domain,DC=com.") + .version("1.7.0") + .serverOnly + .stringConf + .createOptional + + val AUTHENTICATION_LDAP_GROUP_FILTER: ConfigEntry[Seq[String]] = + buildConf("kyuubi.authentication.ldap.groupFilter") + .doc("COMMA-separated list of LDAP Group names (short name not full DNs). " + + "For example: HiveAdmins,HadoopAdmins,Administrators") + .version("1.7.0") + .serverOnly + .stringConf + .toSequence() + .createWithDefault(Nil) + + val AUTHENTICATION_LDAP_USER_FILTER: ConfigEntry[Seq[String]] = + buildConf("kyuubi.authentication.ldap.userFilter") + .doc("COMMA-separated list of LDAP usernames (just short names, not full DNs). " + + "For example: hiveuser,impalauser,hiveadmin,hadoopadmin") + .version("1.7.0") + .serverOnly + .stringConf + .toSequence() + .createWithDefault(Nil) + + val AUTHENTICATION_LDAP_GUID_KEY: ConfigEntry[String] = buildConf("kyuubi.authentication.ldap.guidKey") - .doc("LDAP attribute name whose values are unique in this LDAP server." + - "For example:uid or cn.") + .doc("LDAP attribute name whose values are unique in this LDAP server. " + + "For example: uid or CN.") .version("1.2.0") + .serverOnly .stringConf .createWithDefault("uid") + val AUTHENTICATION_LDAP_GROUP_MEMBERSHIP_KEY: ConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.groupMembershipKey") + .doc("LDAP attribute name on the group object that contains the list of distinguished " + + "names for the user, group, and contact objects that are members of the group. " + + "For example: member, uniqueMember or memberUid") + .version("1.7.0") + .serverOnly + .stringConf + .createWithDefault("member") + + val AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.userMembershipKey") + .doc("LDAP attribute name on the user object that contains groups of which the user is " + + "a direct member, except for the primary group, which is represented by the " + + "primaryGroupId. For example: memberOf") + .version("1.7.0") + .serverOnly + .stringConf + .createOptional + + val AUTHENTICATION_LDAP_GROUP_CLASS_KEY: ConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.groupClassKey") + .doc("LDAP attribute name on the group entry that is to be used in LDAP group searches. " + + "For example: group, groupOfNames or groupOfUniqueNames.") + .version("1.7.0") + .serverOnly + .stringConf + .createWithDefault("groupOfNames") + + val AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.customLDAPQuery") + .doc("A full LDAP query that LDAP Atn provider uses to execute against LDAP Server. " + + "If this query returns a null resultset, the LDAP Provider fails the Authentication " + + "request, succeeds if the user is part of the resultset." + + "For example: `(&(objectClass=group)(objectClass=top)(instanceType=4)(cn=Domain*))`, " + + "`(&(objectClass=person)(|(sAMAccountName=admin)" + + "(|(memberOf=CN=Domain Admins,CN=Users,DC=domain,DC=com)" + + "(memberOf=CN=Administrators,CN=Builtin,DC=domain,DC=com))))`") + .version("1.7.0") + .serverOnly + .stringConf + .createOptional + + val AUTHENTICATION_LDAP_BIND_USER: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.binddn") + .doc("The user with which to bind to the LDAP server, and search for the full domain name " + + "of the user being authenticated. This should be the full domain name of the user, and " + + "should have search access across all users in the LDAP tree. If not specified, then " + + "the user being authenticated will be used as the bind user. " + + "For example: CN=bindUser,CN=Users,DC=subdomain,DC=domain,DC=com") + .version("1.7.0") + .serverOnly + .stringConf + .createOptional + + val AUTHENTICATION_LDAP_BIND_PASSWORD: OptionalConfigEntry[String] = + buildConf("kyuubi.authentication.ldap.bindpw") + .doc("The password for the bind user, to be used to search for the full name of the " + + "user being authenticated. If the username is specified, this parameter must also be " + + "specified.") + .version("1.7.0") + .serverOnly + .stringConf + .createOptional + val AUTHENTICATION_JDBC_DRIVER: OptionalConfigEntry[String] = buildConf("kyuubi.authentication.jdbc.driver.class") .doc("Driver class name for JDBC Authentication Provider.") .version("1.6.0") + .serverOnly .stringConf .createOptional @@ -797,6 +930,7 @@ object KyuubiConf { buildConf("kyuubi.authentication.jdbc.url") .doc("JDBC URL for JDBC Authentication Provider.") .version("1.6.0") + .serverOnly .stringConf .createOptional @@ -804,6 +938,7 @@ object KyuubiConf { buildConf("kyuubi.authentication.jdbc.user") .doc("Database user for JDBC Authentication Provider.") .version("1.6.0") + .serverOnly .stringConf .createOptional @@ -811,6 +946,7 @@ object KyuubiConf { buildConf("kyuubi.authentication.jdbc.password") .doc("Database password for JDBC Authentication Provider.") .version("1.6.0") + .serverOnly .stringConf .createOptional @@ -822,6 +958,7 @@ object KyuubiConf { "The SQL statement must start with the `SELECT` clause. " + "Available placeholders are `${user}` and `${password}`.") .version("1.6.0") + .serverOnly .stringConf .createOptional @@ -860,6 +997,7 @@ object KyuubiConf { "
      • auth-conf - authentication plus integrity and confidentiality protection. This is" + " applicable only if Kyuubi is configured to use Kerberos authentication.
      ") .version("1.0.0") + .serverOnly .stringConf .checkValues(SaslQOP.values.map(_.toString)) .transform(_.toLowerCase(Locale.ROOT)) @@ -954,14 +1092,14 @@ object KyuubiConf { val FRONTEND_TRINO_MAX_WORKER_THREADS: ConfigEntry[Int] = buildConf("kyuubi.frontend.trino.max.worker.threads") - .doc("Maximum number of threads in the of frontend worker thread pool for the trino " + + .doc("Maximum number of threads in the frontend worker thread pool for the Trino " + "frontend service") .version("1.7.0") .fallbackConf(FRONTEND_MAX_WORKER_THREADS) val KUBERNETES_CONTEXT: OptionalConfigEntry[String] = buildConf("kyuubi.kubernetes.context") - .doc("The desired context from your kubernetes config file used to configure the K8S " + + .doc("The desired context from your kubernetes config file used to configure the K8s " + "client for interacting with the cluster.") .version("1.6.0") .stringConf @@ -993,8 +1131,8 @@ object KyuubiConf { val KUBERNETES_AUTHENTICATE_OAUTH_TOKEN: OptionalConfigEntry[String] = buildConf("kyuubi.kubernetes.authenticate.oauthToken") .doc("The OAuth token to use when authenticating against the Kubernetes API server. " + - "Note that unlike the other authentication options, this must be the exact string value " + - "of the token to use for the authentication.") + "Note that unlike, the other authentication options, this must be the exact string value" + + " of the token to use for the authentication.") .version("1.7.0") .stringConf .createOptional @@ -1033,14 +1171,23 @@ object KyuubiConf { .booleanConf .createWithDefault(false) + val KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD: ConfigEntry[Long] = + buildConf("kyuubi.kubernetes.terminatedApplicationRetainPeriod") + .doc("The period for which the Kyuubi server retains application information after " + + "the application terminates.") + .version("1.7.1") + .timeConf + .checkValue(_ > 0, "must be positive number") + .createWithDefault(Duration.ofMinutes(5).toMillis) + // /////////////////////////////////////////////////////////////////////////////////////////////// // SQL Engine Configuration // // /////////////////////////////////////////////////////////////////////////////////////////////// val ENGINE_ERROR_MAX_SIZE: ConfigEntry[Int] = buildConf("kyuubi.session.engine.startup.error.max.size") - .doc("During engine bootstrapping, if error occurs, using this config to limit the length" + - " error message(characters).") + .doc("During engine bootstrapping, if anderror occurs, using this config to limit" + + " the length of error message(characters).") .version("1.1.0") .intConf .checkValue(v => v >= 200 && v <= 8192, s"must in [200, 8192]") @@ -1067,7 +1214,7 @@ object KyuubiConf { .doc("Specify a profile to load session-level configurations from " + "`$KYUUBI_CONF_DIR/kyuubi-session-.conf`. " + "This configuration will be ignored if the file does not exist. " + - "This configuration only has effect when `kyuubi.session.conf.advisor` " + + "This configuration only takes effect when `kyuubi.session.conf.advisor` " + "is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`.") .version("1.7.0") .stringConf @@ -1084,7 +1231,7 @@ object KyuubiConf { val ENGINE_SPARK_MAX_LIFETIME: ConfigEntry[Long] = buildConf("kyuubi.session.engine.spark.max.lifetime") - .doc("Max lifetime for spark engine, the engine will self-terminate when it reaches the" + + .doc("Max lifetime for Spark engine, the engine will self-terminate when it reaches the" + " end of life. 0 or negative means not to self-terminate.") .version("1.6.0") .timeConf @@ -1100,7 +1247,7 @@ object KyuubiConf { val ENGINE_FLINK_MAX_ROWS: ConfigEntry[Int] = buildConf("kyuubi.session.engine.flink.max.rows") - .doc("Max rows of Flink query results. For batch queries, rows that exceeds the limit " + + .doc("Max rows of Flink query results. For batch queries, rows exceeding the limit " + "would be ignored. For streaming queries, the query would be canceled if the limit " + "is reached.") .version("1.5.0") @@ -1117,28 +1264,28 @@ object KyuubiConf { val ENGINE_TRINO_CONNECTION_URL: OptionalConfigEntry[String] = buildConf("kyuubi.session.engine.trino.connection.url") - .doc("The server url that trino engine will connect to") + .doc("The server url that Trino engine will connect to") .version("1.5.0") .stringConf .createOptional val ENGINE_TRINO_CONNECTION_CATALOG: OptionalConfigEntry[String] = buildConf("kyuubi.session.engine.trino.connection.catalog") - .doc("The default catalog that trino engine will connect to") + .doc("The default catalog that Trino engine will connect to") .version("1.5.0") .stringConf .createOptional val ENGINE_TRINO_SHOW_PROGRESS: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.trino.showProgress") - .doc("When true, show the progress bar and final info in the trino engine log.") + .doc("When true, show the progress bar and final info in the Trino engine log.") .version("1.6.0") .booleanConf .createWithDefault(true) val ENGINE_TRINO_SHOW_PROGRESS_DEBUG: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.trino.showProgress.debug") - .doc("When true, show the progress debug info in the trino engine log.") + .doc("When true, show the progress debug info in the Trino engine log.") .version("1.6.0") .booleanConf .createWithDefault(false) @@ -1160,7 +1307,7 @@ object KyuubiConf { val ENGINE_ALIVE_PROBE_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.alive.probe.enabled") .doc("Whether to enable the engine alive probe, it true, we will create a companion thrift" + - " client that sends simple request to check whether the engine is keep alive.") + " client that keeps sending simple requests to check whether the engine is alive.") .version("1.6.0") .booleanConf .createWithDefault(false) @@ -1189,7 +1336,7 @@ object KyuubiConf { val ENGINE_OPEN_RETRY_WAIT: ConfigEntry[Long] = buildConf("kyuubi.session.engine.open.retry.wait") - .doc("How long to wait before retrying to open engine after a failure.") + .doc("How long to wait before retrying to open the engine after failure.") .version("1.7.0") .timeConf .createWithDefault(Duration.ofSeconds(10).toMillis) @@ -1220,6 +1367,14 @@ object KyuubiConf { .version("1.2.0") .fallbackConf(SESSION_TIMEOUT) + val SESSION_CLOSE_ON_DISCONNECT: ConfigEntry[Boolean] = + buildConf("kyuubi.session.close.on.disconnect") + .doc("Session will be closed when client disconnects from kyuubi gateway. " + + "Set this to false to have session outlive its parent connection.") + .version("1.8.0") + .booleanConf + .createWithDefault(true) + val BATCH_SESSION_IDLE_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.batch.session.idle.timeout") .doc("Batch session idle timeout, it will be closed when it's not accessed for this duration") .version("1.6.2") @@ -1241,7 +1396,7 @@ object KyuubiConf { val SESSION_CONF_IGNORE_LIST: ConfigEntry[Seq[String]] = buildConf("kyuubi.session.conf.ignore.list") - .doc("A comma separated list of ignored keys. If the client connection contains any of" + + .doc("A comma-separated list of ignored keys. If the client connection contains any of" + " them, the key and the corresponding value will be removed silently during engine" + " bootstrap and connection setup." + " Note that this rule is for server-side protection defined via administrators to" + @@ -1254,7 +1409,7 @@ object KyuubiConf { val SESSION_CONF_RESTRICT_LIST: ConfigEntry[Seq[String]] = buildConf("kyuubi.session.conf.restrict.list") - .doc("A comma separated list of restricted keys. If the client connection contains any of" + + .doc("A comma-separated list of restricted keys. If the client connection contains any of" + " them, the connection will be rejected explicitly during engine bootstrap and connection" + " setup." + " Note that this rule is for server-side protection defined via administrators to" + @@ -1268,15 +1423,16 @@ object KyuubiConf { val SESSION_USER_SIGN_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.session.user.sign.enabled") .doc("Whether to verify the integrity of session user name" + - " on engine side, e.g. Authz plugin in Spark.") + " on the engine side, e.g. Authz plugin in Spark.") .version("1.7.0") .booleanConf .createWithDefault(false) val SESSION_ENGINE_STARTUP_MAX_LOG_LINES: ConfigEntry[Int] = buildConf("kyuubi.session.engine.startup.maxLogLines") - .doc("The maximum number of engine log lines when errors occur during engine startup phase." + - " Note that this max lines is for client-side to help track engine startup issue.") + .doc("The maximum number of engine log lines when errors occur during the engine" + + " startup phase. Note that this config effects on client-side to" + + " help track engine startup issues.") .version("1.4.0") .intConf .checkValue(_ > 0, "the maximum must be positive integer.") @@ -1284,17 +1440,17 @@ object KyuubiConf { val SESSION_ENGINE_STARTUP_WAIT_COMPLETION: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.startup.waitCompletion") - .doc("Whether to wait for completion after engine starts." + + .doc("Whether to wait for completion after the engine starts." + " If false, the startup process will be destroyed after the engine is started." + " Note that only use it when the driver is not running locally," + - " such as yarn-cluster mode; Otherwise, the engine will be killed.") + " such as in yarn-cluster mode; Otherwise, the engine will be killed.") .version("1.5.0") .booleanConf .createWithDefault(true) val SESSION_ENGINE_LAUNCH_ASYNC: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.launch.async") - .doc("When opening kyuubi session, whether to launch backend engine asynchronously." + + .doc("When opening kyuubi session, whether to launch the backend engine asynchronously." + " When true, the Kyuubi server will set up the connection with the client without delay" + " as the backend engine will be created asynchronously.") .version("1.4.0") @@ -1303,11 +1459,12 @@ object KyuubiConf { val SESSION_LOCAL_DIR_ALLOW_LIST: ConfigEntry[Seq[String]] = buildConf("kyuubi.session.local.dir.allow.list") - .doc("The local dir list that are allowed to access by the kyuubi session application. User" + - " might set some parameters such as `spark.files` and it will upload some local files" + - " when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will" + + .doc("The local dir list that are allowed to access by the kyuubi session application. " + + " End-users might set some parameters such as `spark.files` and it will " + + " upload some local files when launching the kyuubi engine," + + " if the local dir allow list is defined, kyuubi will" + " check whether the path to upload is in the allow list. Note that, if it is empty, there" + - " is no limitation for that and please use absolute path list.") + " is no limitation for that. And please use absolute paths.") .version("1.6.0") .serverOnly .stringConf @@ -1332,14 +1489,14 @@ object KyuubiConf { val BATCH_CONF_IGNORE_LIST: ConfigEntry[Seq[String]] = buildConf("kyuubi.batch.conf.ignore.list") - .doc("A comma separated list of ignored keys for batch conf. If the batch conf contains" + + .doc("A comma-separated list of ignored keys for batch conf. If the batch conf contains" + " any of them, the key and the corresponding value will be removed silently during batch" + " job submission." + " Note that this rule is for server-side protection defined via administrators to" + " prevent some essential configs from tampering." + - " You can also pre-define some config for batch job submission with prefix:" + + " You can also pre-define some config for batch job submission with the prefix:" + " kyuubi.batchConf.[batchType]. For example, you can pre-define `spark.master`" + - " for spark batch job with key `kyuubi.batchConf.spark.spark.master`.") + " for the Spark batch job with key `kyuubi.batchConf.spark.spark.master`.") .version("1.6.0") .stringConf .toSequence() @@ -1373,6 +1530,14 @@ object KyuubiConf { .timeConf .createWithDefault(Duration.ofSeconds(5).toMillis) + val BATCH_RESOURCE_UPLOAD_ENABLED: ConfigEntry[Boolean] = + buildConf("kyuubi.batch.resource.upload.enabled") + .internal + .doc("Whether to enable Kyuubi batch resource upload function.") + .version("1.7.1") + .booleanConf + .createWithDefault(true) + val SERVER_EXEC_POOL_SIZE: ConfigEntry[Int] = buildConf("kyuubi.backend.server.exec.pool.size") .doc("Number of threads in the operation execution thread pool of Kyuubi server") @@ -1403,14 +1568,14 @@ object KyuubiConf { val METADATA_CLEANER_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.metadata.cleaner.enabled") .doc("Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the" + - " metadata that is in terminate state with max age limitation.") + " metadata that is in the terminate state with max age limitation.") .version("1.6.0") .booleanConf .createWithDefault(true) val METADATA_MAX_AGE: ConfigEntry[Long] = buildConf("kyuubi.metadata.max.age") - .doc("The maximum age of metadata, the metadata that exceeds the age will be cleaned.") + .doc("The maximum age of metadata, the metadata exceeding the age will be cleaned.") .version("1.6.0") .timeConf .createWithDefault(Duration.ofDays(3).toMillis) @@ -1424,16 +1589,8 @@ object KyuubiConf { val METADATA_RECOVERY_THREADS: ConfigEntry[Int] = buildConf("kyuubi.metadata.recovery.threads") - .doc("The number of threads for recovery from metadata store when Kyuubi server restarting.") - .version("1.6.0") - .intConf - .createWithDefault(10) - - val METADATA_REQUEST_RETRY_THREADS: ConfigEntry[Int] = - buildConf("kyuubi.metadata.request.retry.threads") - .doc("Number of threads in the metadata request retry manager thread pool. The metadata" + - " store might be unavailable sometimes and the requests will fail, to tolerant for this" + - " case and unblock the main thread, we support to retry the failed requests in async way.") + .doc("The number of threads for recovery from the metadata store " + + "when the Kyuubi server restarts.") .version("1.6.0") .intConf .createWithDefault(10) @@ -1445,10 +1602,31 @@ object KyuubiConf { .timeConf .createWithDefault(Duration.ofSeconds(5).toMillis) - val METADATA_REQUEST_RETRY_QUEUE_SIZE: ConfigEntry[Int] = - buildConf("kyuubi.metadata.request.retry.queue.size") + val METADATA_REQUEST_ASYNC_RETRY_ENABLED: ConfigEntry[Boolean] = + buildConf("kyuubi.metadata.request.async.retry.enabled") + .doc("Whether to retry in async when metadata request failed. When true, return " + + "success response immediately even the metadata request failed, and schedule " + + "it in background until success, to tolerate long-time metadata store outages " + + "w/o blocking the submission request.") + .version("1.7.0") + .booleanConf + .createWithDefault(true) + + val METADATA_REQUEST_ASYNC_RETRY_THREADS: ConfigEntry[Int] = + buildConf("kyuubi.metadata.request.async.retry.threads") + .withAlternative("kyuubi.metadata.request.retry.threads") + .doc("Number of threads in the metadata request async retry manager thread pool. Only " + + s"take affect when ${METADATA_REQUEST_ASYNC_RETRY_ENABLED.key} is `true`.") + .version("1.6.0") + .intConf + .createWithDefault(10) + + val METADATA_REQUEST_ASYNC_RETRY_QUEUE_SIZE: ConfigEntry[Int] = + buildConf("kyuubi.metadata.request.async.retry.queue.size") + .withAlternative("kyuubi.metadata.request.retry.queue.size") .doc("The maximum queue size for buffering metadata requests in memory when the external" + - " metadata storage is down. Requests will be dropped if the queue exceeds.") + " metadata storage is down. Requests will be dropped if the queue exceeds. Only" + + s" take affect when ${METADATA_REQUEST_ASYNC_RETRY_ENABLED.key} is `true`.") .version("1.6.0") .intConf .createWithDefault(65536) @@ -1514,22 +1692,32 @@ object KyuubiConf { val OPERATION_QUERY_TIMEOUT: OptionalConfigEntry[Long] = buildConf("kyuubi.operation.query.timeout") - .doc("Timeout for query executions at server-side, take affect with client-side timeout(" + + .doc("Timeout for query executions at server-side, take effect with client-side timeout(" + "`java.sql.Statement.setQueryTimeout`) together, a running query will be cancelled" + - " automatically if timeout. It's off by default, which means only client-side take fully" + - " control whether the query should timeout or not. If set, client-side timeout capped at" + - " this point. To cancel the queries right away without waiting task to finish, consider" + - s" enabling ${OPERATION_FORCE_CANCEL.key} together.") + " automatically if timeout. It's off by default, which means only client-side take full" + + " control of whether the query should timeout or not." + + " If set, client-side timeout is capped at this point." + + " To cancel the queries right away without waiting for task to finish," + + s" consider enabling ${OPERATION_FORCE_CANCEL.key} together.") .version("1.2.0") .timeConf .checkValue(_ >= 1000, "must >= 1s if set") .createOptional + val OPERATION_RESULT_MAX_ROWS: ConfigEntry[Int] = + buildConf("kyuubi.operation.result.max.rows") + .doc("Max rows of Spark query results. Rows exceeding the limit would be ignored. " + + "By setting this value to 0 to disable the max rows limit.") + .version("1.6.0") + .intConf + .createWithDefault(0) + val OPERATION_INCREMENTAL_COLLECT: ConfigEntry[Boolean] = buildConf("kyuubi.operation.incremental.collect") .internal .doc("When true, the executor side result will be sequentially calculated and returned to" + - " the Spark driver side.") + s" the Spark driver side. Note that, ${OPERATION_RESULT_MAX_ROWS.key} will be ignored" + + " on incremental collect mode.") .version("1.4.0") .booleanConf .createWithDefault(false) @@ -1547,13 +1735,13 @@ object KyuubiConf { .transform(_.toLowerCase(Locale.ROOT)) .createWithDefault("thrift") - val OPERATION_RESULT_MAX_ROWS: ConfigEntry[Int] = - buildConf("kyuubi.operation.result.max.rows") - .doc("Max rows of Spark query results. Rows that exceeds the limit would be ignored. " + - "By setting this value to 0 to disable the max rows limit.") - .version("1.6.0") - .intConf - .createWithDefault(0) + val ARROW_BASED_ROWSET_TIMESTAMP_AS_STRING: ConfigEntry[Boolean] = + buildConf("kyuubi.operation.result.arrow.timestampAsString") + .doc("When true, arrow-based rowsets will convert columns of type timestamp to strings for" + + " transmission.") + .version("1.7.0") + .booleanConf + .createWithDefault(false) val SERVER_OPERATION_LOG_DIR_ROOT: ConfigEntry[String] = buildConf("kyuubi.operation.log.dir.root") @@ -1591,8 +1779,8 @@ object KyuubiConf { val ENGINE_SHARE_LEVEL_SUBDOMAIN: ConfigEntry[Option[String]] = buildConf("kyuubi.engine.share.level.subdomain") .doc("Allow end-users to create a subdomain for the share level of an engine. A" + - " subdomain is a case-insensitive string values that must be a valid zookeeper sub path." + - " For example, for `USER` share level, an end-user can share a certain engine within" + + " subdomain is a case-insensitive string values that must be a valid zookeeper subpath." + + " For example, for the `USER` share level, an end-user can share a certain engine within" + " a subdomain, not for all of its clients. End-users are free to create multiple" + " engines in the `USER` share level. When disable engine pool, use 'default' if absent.") .version("1.4.0") @@ -1602,7 +1790,7 @@ object KyuubiConf { val ENGINE_CONNECTION_URL_USE_HOSTNAME: ConfigEntry[Boolean] = buildConf("kyuubi.engine.connection.url.use.hostname") .doc("(deprecated) " + - "When true, engine register with hostname to zookeeper. When spark run on k8s" + + "When true, the engine registers with hostname to zookeeper. When Spark runs on K8s" + " with cluster mode, set to false to ensure that server can connect to engine") .version("1.3.0") .booleanConf @@ -1612,7 +1800,7 @@ object KyuubiConf { buildConf("kyuubi.frontend.connection.url.use.hostname") .doc("When true, frontend services prefer hostname, otherwise, ip address. Note that, " + "the default value is set to `false` when engine running on Kubernetes to prevent " + - "potential network issue.") + "potential network issues.") .version("1.5.0") .fallbackConf(ENGINE_CONNECTION_URL_USE_HOSTNAME) @@ -1622,10 +1810,11 @@ object KyuubiConf { " connection" + "
    • USER: engine will be shared by all sessions created by a unique username," + s" see also ${ENGINE_SHARE_LEVEL_SUBDOMAIN.key}
    • " + - "
    • GROUP: engine will be shared by all sessions created by all users belong to the same" + - " primary group name. The engine will be launched by the group name as the effective" + - " username, so here the group name is kind of special user who is able to visit the" + - " compute resources/data of a team. It follows the" + + "
    • GROUP: the engine will be shared by all sessions created" + + " by all users belong to the same primary group name." + + " The engine will be launched by the group name as the effective" + + " username, so here the group name is in value of special user who is able to visit the" + + " computing resources/data of the team. It follows the" + " [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the" + " primary group is not found, it fallback to the USER level." + "
    • SERVER: the App will be shared by Kyuubi servers
    ") @@ -1633,7 +1822,7 @@ object KyuubiConf { .fallbackConf(LEGACY_ENGINE_SHARE_LEVEL) val ENGINE_TYPE: ConfigEntry[String] = buildConf("kyuubi.engine.type") - .doc("Specify the detailed engine that supported by the Kyuubi. The engine type bindings to" + + .doc("Specify the detailed engine supported by Kyuubi. The engine type bindings to" + " SESSION scope. This configuration is experimental. Currently, available configs are:
      " + "
    • SPARK_SQL: specify this engine type will launch a Spark engine which can provide" + " all the capacity of the Apache Spark. Note, it's a default engine type.
    • " + @@ -1644,7 +1833,8 @@ object KyuubiConf { "
    • HIVE_SQL: specify this engine type will launch a Hive engine which can provide" + " all the capacity of the Hive Server2.
    • " + "
    • JDBC: specify this engine type will launch a JDBC engine which can provide" + - " a mysql protocol connector, for now we only support Doris dialect.
    • " + + " a MySQL protocol connector, for now we only support Doris dialect." + + "
    • CHAT: specify this engine type will launch a Chat engine.
    • " + "
    ") .version("1.4.0") .stringConf @@ -1662,22 +1852,22 @@ object KyuubiConf { .createWithDefault(false) val ENGINE_POOL_NAME: ConfigEntry[String] = buildConf("kyuubi.engine.pool.name") - .doc("The name of engine pool.") + .doc("The name of the engine pool.") .version("1.5.0") .stringConf .checkValue(validZookeeperSubPath.matcher(_).matches(), "must be valid zookeeper sub path.") .createWithDefault("engine-pool") val ENGINE_POOL_SIZE_THRESHOLD: ConfigEntry[Int] = buildConf("kyuubi.engine.pool.size.threshold") - .doc("This parameter is introduced as a server-side parameter, " + - "and controls the upper limit of the engine pool.") + .doc("This parameter is introduced as a server-side parameter " + + "controlling the upper limit of the engine pool.") .version("1.4.0") .intConf .checkValue(s => s > 0 && s < 33, "Invalid engine pool threshold, it should be in [1, 32]") .createWithDefault(9) val ENGINE_POOL_SIZE: ConfigEntry[Int] = buildConf("kyuubi.engine.pool.size") - .doc("The size of engine pool. Note that, " + + .doc("The size of the engine pool. Note that, " + "if the size is less than 1, the engine pool will not be enabled; " + "otherwise, the size of the engine pool will be " + s"min(this, ${ENGINE_POOL_SIZE_THRESHOLD.key}).") @@ -1685,7 +1875,7 @@ object KyuubiConf { .intConf .createWithDefault(-1) - val ENGINE_POOL_BALANCE_POLICY: ConfigEntry[String] = + val ENGINE_POOL_SELECT_POLICY: ConfigEntry[String] = buildConf("kyuubi.engine.pool.selectPolicy") .doc("The select policy of an engine from the corresponding engine pool engine for " + "a session.
      " + @@ -1720,7 +1910,7 @@ object KyuubiConf { val ENGINE_DEREGISTER_EXCEPTION_CLASSES: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.deregister.exception.classes") - .doc("A comma separated list of exception classes. If there is any exception thrown," + + .doc("A comma-separated list of exception classes. If there is any exception thrown," + " whose class matches the specified classes, the engine would deregister itself.") .version("1.2.0") .stringConf @@ -1729,7 +1919,7 @@ object KyuubiConf { val ENGINE_DEREGISTER_EXCEPTION_MESSAGES: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.deregister.exception.messages") - .doc("A comma separated list of exception messages. If there is any exception thrown," + + .doc("A comma-separated list of exception messages. If there is any exception thrown," + " whose message or stacktrace matches the specified message list, the engine would" + " deregister itself.") .version("1.2.0") @@ -1760,8 +1950,8 @@ object KyuubiConf { val OPERATION_SCHEDULER_POOL: OptionalConfigEntry[String] = buildConf("kyuubi.operation.scheduler.pool") - .doc("The scheduler pool of job. Note that, this config should be used after change Spark " + - "config spark.scheduler.mode=FAIR.") + .doc("The scheduler pool of job. Note that, this config should be used after changing " + + "Spark config spark.scheduler.mode=FAIR.") .version("1.1.1") .stringConf .createOptional @@ -1778,8 +1968,8 @@ object KyuubiConf { val ENGINE_USER_ISOLATED_SPARK_SESSION: ConfigEntry[Boolean] = buildConf("kyuubi.engine.user.isolated.spark.session") .doc("When set to false, if the engine is running in a group or server share level, " + - "all the JDBC/ODBC connections will be isolated against the user. Including: " + - "the temporary views, function registries, SQL configuration and the current database. " + + "all the JDBC/ODBC connections will be isolated against the user. Including " + + "the temporary views, function registries, SQL configuration, and the current database. " + "Note that, it does not affect if the share level is connection or user.") .version("1.6.0") .booleanConf @@ -1788,21 +1978,21 @@ object KyuubiConf { val ENGINE_USER_ISOLATED_SPARK_SESSION_IDLE_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.engine.user.isolated.spark.session.idle.timeout") .doc(s"If ${ENGINE_USER_ISOLATED_SPARK_SESSION.key} is false, we will release the " + - s"spark session if its corresponding user is inactive after this configured timeout.") + s"Spark session if its corresponding user is inactive after this configured timeout.") .version("1.6.0") .timeConf .createWithDefault(Duration.ofHours(6).toMillis) val ENGINE_USER_ISOLATED_SPARK_SESSION_IDLE_INTERVAL: ConfigEntry[Long] = buildConf("kyuubi.engine.user.isolated.spark.session.idle.interval") - .doc(s"The interval to check if the user isolated spark session is timeout.") + .doc(s"The interval to check if the user-isolated Spark session is timeout.") .version("1.6.0") .timeConf .createWithDefault(Duration.ofMinutes(1).toMillis) val SERVER_EVENT_JSON_LOG_PATH: ConfigEntry[String] = buildConf("kyuubi.backend.server.event.json.log.path") - .doc("The location of server events go for the builtin JSON logger") + .doc("The location of server events go for the built-in JSON logger") .version("1.4.0") .serverOnly .stringConf @@ -1810,7 +2000,7 @@ object KyuubiConf { val ENGINE_EVENT_JSON_LOG_PATH: ConfigEntry[String] = buildConf("kyuubi.engine.event.json.log.path") - .doc("The location of all the engine events go for the builtin JSON logger.
        " + + .doc("The location where all the engine events go for the built-in JSON logger.
          " + "
        • Local Path: start with 'file://'
        • " + "
        • HDFS Path: start with 'hdfs://'
        ") .version("1.3.0") @@ -1819,7 +2009,7 @@ object KyuubiConf { val SERVER_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.backend.server.event.loggers") - .doc("A comma separated list of server history loggers, where session/operation etc" + + .doc("A comma-separated list of server history loggers, where session/operation etc" + " events go.
          " + s"
        • JSON: the events will be written to the location of" + s" ${SERVER_EVENT_JSON_LOG_PATH.key}
        • " + @@ -1827,9 +2017,9 @@ object KyuubiConf { s"
        • CUSTOM: User-defined event handlers.
        " + " Note that: Kyuubi supports custom event handlers with the Java SPI." + " To register a custom event handler," + - " user need to implement a class" + + " the user needs to implement a class" + " which is a child of org.apache.kyuubi.events.handler.CustomEventHandlerProvider" + - " which has zero-arg constructor.") + " which has a zero-arg constructor.") .version("1.4.0") .serverOnly .stringConf @@ -1841,18 +2031,18 @@ object KyuubiConf { @deprecated("using kyuubi.engine.spark.event.loggers instead", "1.6.0") val ENGINE_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.event.loggers") - .doc("A comma separated list of engine history loggers, where engine/session/operation etc" + + .doc("A comma-separated list of engine history loggers, where engine/session/operation etc" + " events go.
          " + - "
        • SPARK: the events will be written to the spark listener bus.
        • " + + "
        • SPARK: the events will be written to the Spark listener bus.
        • " + "
        • JSON: the events will be written to the location of" + s" ${ENGINE_EVENT_JSON_LOG_PATH.key}
        • " + "
        • JDBC: to be done
        • " + "
        • CUSTOM: User-defined event handlers.
        " + " Note that: Kyuubi supports custom event handlers with the Java SPI." + " To register a custom event handler," + - " user need to implement a class" + - " which is a child of org.apache.kyuubi.events.handler.CustomEventHandlerProvider" + - " which has zero-arg constructor.") + " the user needs to implement a subclass" + + " of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider`" + + " which has a zero-arg constructor.") .version("1.3.0") .stringConf .transform(_.toUpperCase(Locale.ROOT)) @@ -1914,11 +2104,26 @@ object KyuubiConf { buildConf("kyuubi.engine.security.secret.provider") .internal .doc("The class used to manage the internal security secret. This class must be a " + - "subclass of EngineSecuritySecretProvider.") + "subclass of `EngineSecuritySecretProvider`.") .version("1.5.0") .stringConf - .createWithDefault( - "org.apache.kyuubi.service.authentication.ZooKeeperEngineSecuritySecretProviderImpl") + .transform { + case "simple" => + "org.apache.kyuubi.service.authentication.SimpleEngineSecuritySecretProviderImpl" + case "zookeeper" => + "org.apache.kyuubi.service.authentication.ZooKeeperEngineSecuritySecretProviderImpl" + case other => other + } + .createWithDefault("zookeeper") + + val SIMPLE_SECURITY_SECRET_PROVIDER_PROVIDER_SECRET: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.security.secret.provider.simple.secret") + .internal + .doc("The secret key used for internal security access. Only take affects when " + + s"${ENGINE_SECURITY_SECRET_PROVIDER.key} is 'simple'") + .version("1.7.0") + .stringConf + .createOptional val ENGINE_SECURITY_CRYPTO_KEY_LENGTH: ConfigEntry[Int] = buildConf("kyuubi.engine.security.crypto.keyLength") @@ -1956,8 +2161,8 @@ object KyuubiConf { val SESSION_NAME: OptionalConfigEntry[String] = buildConf("kyuubi.session.name") - .doc("A human readable name of session and we use empty string by default. " + - "This name will be recorded in event. Note that, we only apply this value from " + + .doc("A human readable name of the session and we use empty string by default. " + + "This name will be recorded in the event. Note that, we only apply this value from " + "session conf.") .version("1.4.0") .stringConf @@ -1991,8 +2196,9 @@ object KyuubiConf { val OPERATION_PLAN_ONLY_OUT_STYLE: ConfigEntry[String] = buildConf("kyuubi.operation.plan.only.output.style") - .doc("Configures the planOnly output style, The value can be 'plain' and 'json', default " + - "value is 'plain', this configuration supports only the output styles of the Spark engine") + .doc("Configures the planOnly output style. The value can be 'plain' or 'json', and " + + "the default value is 'plain'. This configuration supports only the output styles " + + "of the Spark engine") .version("1.7.0") .stringConf .transform(_.toUpperCase(Locale.ROOT)) @@ -2005,8 +2211,8 @@ object KyuubiConf { val OPERATION_PLAN_ONLY_EXCLUDES: ConfigEntry[Seq[String]] = buildConf("kyuubi.operation.plan.only.excludes") .doc("Comma-separated list of query plan names, in the form of simple class names, i.e, " + - "for `set abc=xyz`, the value will be `SetCommand`. For those auxiliary plans, such as " + - "`switch databases`, `set properties`, or `create temporary view` e.t.c, " + + "for `SET abc=xyz`, the value will be `SetCommand`. For those auxiliary plans, such as " + + "`switch databases`, `set properties`, or `create temporary view` etc., " + "which are used for setup evaluating environments for analyzing actual queries, " + "we can use this config to exclude them and let them take effect. " + s"See also ${OPERATION_PLAN_ONLY_MODE.key}.") @@ -2038,8 +2244,12 @@ object KyuubiConf { val OPERATION_LANGUAGE: ConfigEntry[String] = buildConf("kyuubi.operation.language") .doc("Choose a programing language for the following inputs" + - "
        • SQL: (Default) Run all following statements as SQL queries.
        • " + - "
        • SCALA: Run all following input a scala codes
        ") + "
          " + + "
        • SQL: (Default) Run all following statements as SQL queries.
        • " + + "
        • SCALA: Run all following input as scala codes
        • " + + "
        • PYTHON: (Experimental) Run all following input as Python codes with Spark engine" + + "
        • " + + "
        ") .version("1.5.0") .stringConf .transform(_.toUpperCase(Locale.ROOT)) @@ -2049,9 +2259,9 @@ object KyuubiConf { val SESSION_CONF_ADVISOR: OptionalConfigEntry[String] = buildConf("kyuubi.session.conf.advisor") .doc("A config advisor plugin for Kyuubi Server. This plugin can provide some custom " + - "configs for different user or session configs and overwrite the session configs before " + - "open a new session. This config value should be a class which is a child of " + - "'org.apache.kyuubi.plugin.SessionConfAdvisor' which has zero-arg constructor.") + "configs for different users or session configs and overwrite the session configs before " + + "opening a new session. This config value should be a subclass of " + + "`org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor.") .version("1.5.0") .stringConf .createOptional @@ -2059,9 +2269,9 @@ object KyuubiConf { val GROUP_PROVIDER: ConfigEntry[String] = buildConf("kyuubi.session.group.provider") .doc("A group provider plugin for Kyuubi Server. This plugin can provide primary group " + - "and groups information for different user or session configs. This config value " + - "should be a class which is a child of 'org.apache.kyuubi.plugin.GroupProvider' which " + - "has zero-arg constructor. Kyuubi provides the following built-in implementations: " + + "and groups information for different users or session configs. This config value " + + "should be a subclass of `org.apache.kyuubi.plugin.GroupProvider` which " + + "has a zero-arg constructor. Kyuubi provides the following built-in implementations: " + "
      • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
      • ") .version("1.7.0") .stringConf @@ -2091,7 +2301,7 @@ object KyuubiConf { val ENGINE_SPARK_SHOW_PROGRESS: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.spark.showProgress") - .doc("When true, show the progress bar in the spark engine log.") + .doc("When true, show the progress bar in the Spark's engine log.") .version("1.6.0") .booleanConf .createWithDefault(false) @@ -2113,65 +2323,65 @@ object KyuubiConf { val ENGINE_TRINO_MEMORY: ConfigEntry[String] = buildConf("kyuubi.engine.trino.memory") - .doc("The heap memory for the trino query engine") + .doc("The heap memory for the Trino query engine") .version("1.6.0") .stringConf .createWithDefault("1g") val ENGINE_TRINO_JAVA_OPTIONS: OptionalConfigEntry[String] = buildConf("kyuubi.engine.trino.java.options") - .doc("The extra java options for the trino query engine") + .doc("The extra Java options for the Trino query engine") .version("1.6.0") .stringConf .createOptional val ENGINE_TRINO_EXTRA_CLASSPATH: OptionalConfigEntry[String] = buildConf("kyuubi.engine.trino.extra.classpath") - .doc("The extra classpath for the trino query engine, " + - "for configuring other libs which may need by the trino engine ") + .doc("The extra classpath for the Trino query engine, " + + "for configuring other libs which may need by the Trino engine ") .version("1.6.0") .stringConf .createOptional val ENGINE_HIVE_MEMORY: ConfigEntry[String] = buildConf("kyuubi.engine.hive.memory") - .doc("The heap memory for the hive query engine") + .doc("The heap memory for the Hive query engine") .version("1.6.0") .stringConf .createWithDefault("1g") val ENGINE_HIVE_JAVA_OPTIONS: OptionalConfigEntry[String] = buildConf("kyuubi.engine.hive.java.options") - .doc("The extra java options for the hive query engine") + .doc("The extra Java options for the Hive query engine") .version("1.6.0") .stringConf .createOptional val ENGINE_HIVE_EXTRA_CLASSPATH: OptionalConfigEntry[String] = buildConf("kyuubi.engine.hive.extra.classpath") - .doc("The extra classpath for the hive query engine, for configuring location" + - " of hadoop client jars, etc") + .doc("The extra classpath for the Hive query engine, for configuring location" + + " of the hadoop client jars and etc.") .version("1.6.0") .stringConf .createOptional val ENGINE_FLINK_MEMORY: ConfigEntry[String] = buildConf("kyuubi.engine.flink.memory") - .doc("The heap memory for the flink sql engine") + .doc("The heap memory for the Flink SQL engine") .version("1.6.0") .stringConf .createWithDefault("1g") val ENGINE_FLINK_JAVA_OPTIONS: OptionalConfigEntry[String] = buildConf("kyuubi.engine.flink.java.options") - .doc("The extra java options for the flink sql engine") + .doc("The extra Java options for the Flink SQL engine") .version("1.6.0") .stringConf .createOptional val ENGINE_FLINK_EXTRA_CLASSPATH: OptionalConfigEntry[String] = buildConf("kyuubi.engine.flink.extra.classpath") - .doc("The extra classpath for the flink sql engine, for configuring location" + + .doc("The extra classpath for the Flink SQL engine, for configuring the location" + " of hadoop client jars, etc") .version("1.6.0") .stringConf @@ -2206,7 +2416,7 @@ object KyuubiConf { val SERVER_LIMIT_CONNECTIONS_USER_UNLIMITED_LIST: ConfigEntry[Seq[String]] = buildConf("kyuubi.server.limit.connections.user.unlimited.list") - .doc("The maximin connections of the user in the white list will not be limited.") + .doc("The maximum connections of the user in the white list will not be limited.") .version("1.7.0") .serverOnly .stringConf @@ -2214,7 +2424,7 @@ object KyuubiConf { .createWithDefault(Nil) val SERVER_LIMIT_BATCH_CONNECTIONS_PER_USER: OptionalConfigEntry[Int] = - buildConf("kyuubi.server.batch.limit.connections.per.user") + buildConf("kyuubi.server.limit.batch.connections.per.user") .doc("Maximum kyuubi server batch connections per user." + " Any user exceeding this limit will not be allowed to connect.") .version("1.7.0") @@ -2223,7 +2433,7 @@ object KyuubiConf { .createOptional val SERVER_LIMIT_BATCH_CONNECTIONS_PER_IPADDRESS: OptionalConfigEntry[Int] = - buildConf("kyuubi.server.batch.limit.connections.per.ipaddress") + buildConf("kyuubi.server.limit.batch.connections.per.ipaddress") .doc("Maximum kyuubi server batch connections per ipaddress." + " Any user exceeding this limit will not be allowed to connect.") .version("1.7.0") @@ -2232,7 +2442,7 @@ object KyuubiConf { .createOptional val SERVER_LIMIT_BATCH_CONNECTIONS_PER_USER_IPADDRESS: OptionalConfigEntry[Int] = - buildConf("kyuubi.server.batch.limit.connections.per.user.ipaddress") + buildConf("kyuubi.server.limit.batch.connections.per.user.ipaddress") .doc("Maximum kyuubi server batch connections per user:ipaddress combination." + " Any user-ipaddress exceeding this limit will not be allowed to connect.") .version("1.7.0") @@ -2240,6 +2450,15 @@ object KyuubiConf { .intConf .createOptional + val SERVER_LIMIT_CLIENT_FETCH_MAX_ROWS: OptionalConfigEntry[Int] = + buildConf("kyuubi.server.limit.client.fetch.max.rows") + .doc("Max rows limit for getting result row set operation. If the max rows specified " + + "by client-side is larger than the limit, request will fail directly.") + .version("1.8.0") + .serverOnly + .intConf + .createOptional + val SESSION_PROGRESS_ENABLE: ConfigEntry[Boolean] = buildConf("kyuubi.operation.progress.enabled") .doc("Whether to enable the operation progress. When true," + @@ -2256,17 +2475,35 @@ object KyuubiConf { .regexConf .createOptional + val SERVER_PERIODIC_GC_INTERVAL: ConfigEntry[Long] = + buildConf("kyuubi.server.periodicGC.interval") + .doc("How often to trigger a garbage collection.") + .version("1.7.0") + .serverOnly + .timeConf + .createWithDefaultString("PT30M") + + val SERVER_ADMINISTRATORS: ConfigEntry[Seq[String]] = + buildConf("kyuubi.server.administrators") + .doc("Comma-separated list of Kyuubi service administrators. " + + "We use this config to grant admin permission to any service accounts.") + .version("1.8.0") + .serverOnly + .stringConf + .toSequence() + .createWithDefault(Nil) + val OPERATION_SPARK_LISTENER_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.operation.spark.listener.enabled") - .doc("When set to true, Spark engine registers a SQLOperationListener before executing " + - "the statement, logs a few summary statistics when each stage completes.") + .doc("When set to true, Spark engine registers an SQLOperationListener before executing " + + "the statement, logging a few summary statistics when each stage completes.") .version("1.6.0") .booleanConf .createWithDefault(true) val ENGINE_JDBC_DRIVER_CLASS: OptionalConfigEntry[String] = buildConf("kyuubi.engine.jdbc.driver.class") - .doc("The driver class for jdbc engine connection") + .doc("The driver class for JDBC engine connection") .version("1.6.0") .stringConf .createOptional @@ -2302,14 +2539,14 @@ object KyuubiConf { val ENGINE_JDBC_CONNECTION_PROVIDER: OptionalConfigEntry[String] = buildConf("kyuubi.engine.jdbc.connection.provider") - .doc("The connection provider is used for getting a connection from server") + .doc("The connection provider is used for getting a connection from the server") .version("1.6.0") .stringConf .createOptional val ENGINE_JDBC_SHORT_NAME: OptionalConfigEntry[String] = buildConf("kyuubi.engine.jdbc.type") - .doc("The short name of jdbc type") + .doc("The short name of JDBC type") .version("1.6.0") .stringConf .createOptional @@ -2322,6 +2559,15 @@ object KyuubiConf { .booleanConf .createWithDefault(true) + val ENGINE_SUBMIT_TIMEOUT: ConfigEntry[Long] = + buildConf("kyuubi.engine.submit.timeout") + .doc("Period to tolerant Driver Pod ephemerally invisible after submitting. " + + "In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately " + + "after `spark-submit` is returned.") + .version("1.7.1") + .timeConf + .createWithDefaultString("PT30S") + /** * Holds information about keys that have been deprecated. * @@ -2393,33 +2639,111 @@ object KyuubiConf { Map(configs.map { cfg => cfg.key -> cfg }: _*) } + val ENGINE_CHAT_MEMORY: ConfigEntry[String] = + buildConf("kyuubi.engine.chat.memory") + .doc("The heap memory for the Chat engine") + .version("1.8.0") + .stringConf + .createWithDefault("1g") + + val ENGINE_CHAT_JAVA_OPTIONS: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.chat.java.options") + .doc("The extra Java options for the Chat engine") + .version("1.8.0") + .stringConf + .createOptional + + val ENGINE_CHAT_PROVIDER: ConfigEntry[String] = + buildConf("kyuubi.engine.chat.provider") + .doc("The provider for the Chat engine. Candidates:
          " + + "
        • ECHO: simply replies a welcome message.
        • " + + "
        • GPT: a.k.a ChatGPT, powered by OpenAI.
        • " + + "
        ") + .version("1.8.0") + .stringConf + .transform { + case "ECHO" | "echo" => "org.apache.kyuubi.engine.chat.provider.EchoProvider" + case "GPT" | "gpt" | "ChatGPT" => "org.apache.kyuubi.engine.chat.provider.ChatGPTProvider" + case other => other + } + .createWithDefault("ECHO") + + val ENGINE_CHAT_GPT_API_KEY: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.chat.gpt.apiKey") + .doc("The key to access OpenAI open API, which could be got at " + + "https://platform.openai.com/account/api-keys") + .version("1.8.0") + .stringConf + .createOptional + + val ENGINE_CHAT_GPT_MODEL: ConfigEntry[String] = + buildConf("kyuubi.engine.chat.gpt.model") + .doc("ID of the model used in ChatGPT. Available models refer to OpenAI's " + + "[Model overview](https://platform.openai.com/docs/models/overview).") + .version("1.8.0") + .stringConf + .createWithDefault("gpt-3.5-turbo") + + val ENGINE_CHAT_EXTRA_CLASSPATH: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.chat.extra.classpath") + .doc("The extra classpath for the Chat engine, for configuring the location " + + "of the SDK and etc.") + .version("1.8.0") + .stringConf + .createOptional + + val ENGINE_CHAT_GPT_HTTP_PROXY: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.chat.gpt.http.proxy") + .doc("HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087") + .version("1.8.0") + .stringConf + .createOptional + + val ENGINE_CHAT_GPT_HTTP_CONNECT_TIMEOUT: ConfigEntry[Long] = + buildConf("kyuubi.engine.chat.gpt.http.connect.timeout") + .doc("The timeout[ms] for establishing the connection with the Chat GPT server. " + + "A timeout value of zero is interpreted as an infinite timeout.") + .version("1.8.0") + .timeConf + .checkValue(_ >= 0, "must be 0 or positive number") + .createWithDefault(Duration.ofSeconds(120).toMillis) + + val ENGINE_CHAT_GPT_HTTP_SOCKET_TIMEOUT: ConfigEntry[Long] = + buildConf("kyuubi.engine.chat.gpt.http.socket.timeout") + .doc("The timeout[ms] for waiting for data packets after Chat GPT server " + + "connection is established. A timeout value of zero is interpreted as an infinite timeout.") + .version("1.8.0") + .timeConf + .checkValue(_ >= 0, "must be 0 or positive number") + .createWithDefault(Duration.ofSeconds(120).toMillis) + val ENGINE_JDBC_MEMORY: ConfigEntry[String] = buildConf("kyuubi.engine.jdbc.memory") - .doc("The heap memory for the jdbc query engine") + .doc("The heap memory for the JDBC query engine") .version("1.6.0") .stringConf .createWithDefault("1g") val ENGINE_JDBC_JAVA_OPTIONS: OptionalConfigEntry[String] = buildConf("kyuubi.engine.jdbc.java.options") - .doc("The extra java options for the jdbc query engine") + .doc("The extra Java options for the JDBC query engine") .version("1.6.0") .stringConf .createOptional val ENGINE_JDBC_EXTRA_CLASSPATH: OptionalConfigEntry[String] = buildConf("kyuubi.engine.jdbc.extra.classpath") - .doc("The extra classpath for the jdbc query engine, for configuring location" + - " of jdbc driver, etc") + .doc("The extra classpath for the JDBC query engine, for configuring the location" + + " of the JDBC driver and etc.") .version("1.6.0") .stringConf .createOptional val ENGINE_SPARK_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.spark.event.loggers") - .doc("A comma separated list of engine loggers, where engine/session/operation etc" + + .doc("A comma-separated list of engine loggers, where engine/session/operation etc" + " events go.
          " + - "
        • SPARK: the events will be written to the spark listener bus.
        • " + + "
        • SPARK: the events will be written to the Spark listener bus.
        • " + "
        • JSON: the events will be written to the location of" + s" ${ENGINE_EVENT_JSON_LOG_PATH.key}
        • " + "
        • JDBC: to be done
        • " + @@ -2430,28 +2754,37 @@ object KyuubiConf { val ENGINE_SPARK_PYTHON_HOME_ARCHIVE: OptionalConfigEntry[String] = buildConf("kyuubi.engine.spark.python.home.archive") .doc("Spark archive containing $SPARK_HOME/python directory, which is used to init session" + - " python worker for python language mode.") + " Python worker for Python language mode.") .version("1.7.0") .stringConf .createOptional val ENGINE_SPARK_PYTHON_ENV_ARCHIVE: OptionalConfigEntry[String] = buildConf("kyuubi.engine.spark.python.env.archive") - .doc("Portable python env archive used for Spark engine python language mode.") + .doc("Portable Python env archive used for Spark engine Python language mode.") .version("1.7.0") .stringConf .createOptional val ENGINE_SPARK_PYTHON_ENV_ARCHIVE_EXEC_PATH: ConfigEntry[String] = buildConf("kyuubi.engine.spark.python.env.archive.exec.path") - .doc("The python exec path under the python env archive.") + .doc("The Python exec path under the Python env archive.") .version("1.7.0") .stringConf .createWithDefault("bin/python") + val ENGINE_SPARK_REGISTER_ATTRIBUTES: ConfigEntry[Seq[String]] = + buildConf("kyuubi.engine.spark.register.attributes") + .internal + .doc("The extra attributes to expose when registering for Spark engine.") + .version("1.8.0") + .stringConf + .toSequence() + .createWithDefault(Seq("spark.driver.memory", "spark.executor.memory")) + val ENGINE_HIVE_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.hive.event.loggers") - .doc("A comma separated list of engine history loggers, where engine/session/operation etc" + + .doc("A comma-separated list of engine history loggers, where engine/session/operation etc" + " events go.
            " + "
          • JSON: the events will be written to the location of" + s" ${ENGINE_EVENT_JSON_LOG_PATH.key}
          • " + @@ -2468,7 +2801,7 @@ object KyuubiConf { val ENGINE_TRINO_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.trino.event.loggers") - .doc("A comma separated list of engine history loggers, where engine/session/operation etc" + + .doc("A comma-separated list of engine history loggers, where engine/session/operation etc" + " events go.
              " + "
            • JSON: the events will be written to the location of" + s" ${ENGINE_EVENT_JSON_LOG_PATH.key}
            • " + @@ -2504,4 +2837,11 @@ object KyuubiConf { .version("1.7.0") .timeConf .createWithDefault(Duration.ofSeconds(60).toMillis) + + val OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES: ConfigEntry[Boolean] = + buildConf("kyuubi.operation.getTables.ignoreTableProperties") + .doc("Speed up the `GetTables` operation by returning table identities only.") + .version("1.8.0") + .booleanConf + .createWithDefault(false) } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala index 50dae6275c5..8b42e659f82 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala @@ -19,18 +19,21 @@ package org.apache.kyuubi.config object KyuubiReservedKeys { final val KYUUBI_CLIENT_IP_KEY = "kyuubi.client.ipAddress" + final val KYUUBI_CLIENT_VERSION_KEY = "kyuubi.client.version" final val KYUUBI_SERVER_IP_KEY = "kyuubi.server.ipAddress" final val KYUUBI_SESSION_USER_KEY = "kyuubi.session.user" final val KYUUBI_SESSION_SIGN_PUBLICKEY = "kyuubi.session.sign.publickey" final val KYUUBI_SESSION_USER_SIGN = "kyuubi.session.user.sign" final val KYUUBI_SESSION_REAL_USER_KEY = "kyuubi.session.real.user" final val KYUUBI_SESSION_CONNECTION_URL_KEY = "kyuubi.session.connection.url" + final val KYUUBI_BATCH_RESOURCE_UPLOADED_KEY = "kyuubi.batch.resource.uploaded" final val KYUUBI_STATEMENT_ID_KEY = "kyuubi.statement.id" final val KYUUBI_ENGINE_ID = "kyuubi.engine.id" final val KYUUBI_ENGINE_NAME = "kyuubi.engine.name" final val KYUUBI_ENGINE_URL = "kyuubi.engine.url" final val KYUUBI_ENGINE_SUBMIT_TIME_KEY = "kyuubi.engine.submit.time" final val KYUUBI_ENGINE_CREDENTIALS_KEY = "kyuubi.engine.credentials" + final val KYUUBI_SESSION_HANDLE_KEY = "kyuubi.session.handle" final val KYUUBI_SESSION_ENGINE_LAUNCH_HANDLE_GUID = "kyuubi.session.engine.launch.handle.guid" final val KYUUBI_SESSION_ENGINE_LAUNCH_HANDLE_SECRET = @@ -39,4 +42,5 @@ object KyuubiReservedKeys { final val KYUUBI_OPERATION_GET_CURRENT_CATALOG = "kyuubi.operation.get.current.catalog" final val KYUUBI_OPERATION_SET_CURRENT_DATABASE = "kyuubi.operation.set.current.database" final val KYUUBI_OPERATION_GET_CURRENT_DATABASE = "kyuubi.operation.get.current.database" + final val KYUUBI_OPERATION_HANDLE_KEY = "kyuubi.operation.handle" } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala index 88680a8c757..3d850ba14f5 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala @@ -23,5 +23,5 @@ package org.apache.kyuubi.engine object EngineType extends Enumeration { type EngineType = Value - val SPARK_SQL, FLINK_SQL, TRINO, HIVE_SQL, JDBC = Value + val SPARK_SQL, FLINK_SQL, CHAT, TRINO, HIVE_SQL, JDBC = Value } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala index 9cdd6a8f0c9..d50cb8e243f 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala @@ -36,7 +36,7 @@ abstract class AbstractOperation(session: Session) extends Operation with Loggin final protected val opType: String = getClass.getSimpleName final protected val createTime = System.currentTimeMillis() - final private val handle = OperationHandle() + protected val handle = OperationHandle() final private val operationTimeout: Long = { session.sessionManager.getConf.get(OPERATION_IDLE_TIMEOUT) } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala index fe38263db64..df45e6dee01 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala @@ -17,6 +17,8 @@ package org.apache.kyuubi.operation +import scala.collection.JavaConverters._ + import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.KyuubiSQLException @@ -41,6 +43,8 @@ abstract class OperationManager(name: String) extends AbstractService(name) { def getOperationCount: Int = handleToOperation.size() + def allOperations(): Iterable[Operation] = handleToOperation.values().asScala + override def initialize(conf: KyuubiConf): Unit = { LogDivertAppender.initialize(skipOperationLog) super.initialize(conf) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j12DivertAppender.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j12DivertAppender.scala index 1191e94ae29..df2ef93d83b 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j12DivertAppender.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j12DivertAppender.scala @@ -39,7 +39,7 @@ class Log4j12DivertAppender extends WriterAppender { setLayout(lo) addFilter { _: LoggingEvent => - if (OperationLog.getCurrentOperationLog == null) Filter.DENY else Filter.NEUTRAL + if (OperationLog.getCurrentOperationLog.isDefined) Filter.NEUTRAL else Filter.DENY } /** @@ -51,8 +51,7 @@ class Log4j12DivertAppender extends WriterAppender { // That should've gone into our writer. Notify the LogContext. val logOutput = writer.toString writer.reset() - val log = OperationLog.getCurrentOperationLog - if (log != null) log.write(logOutput) + OperationLog.getCurrentOperationLog.foreach(_.write(logOutput)) } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala index 68753cf9865..dc4b24a8ca6 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala @@ -18,6 +18,7 @@ package org.apache.kyuubi.operation.log import java.io.CharArrayWriter +import java.util.concurrent.locks.ReadWriteLock import scala.collection.JavaConverters._ @@ -27,6 +28,8 @@ import org.apache.logging.log4j.core.appender.{AbstractWriterAppender, ConsoleAp import org.apache.logging.log4j.core.filter.AbstractFilter import org.apache.logging.log4j.core.layout.PatternLayout +import org.apache.kyuubi.reflection.DynFields + class Log4j2DivertAppender( name: String, layout: StringLayout, @@ -52,22 +55,19 @@ class Log4j2DivertAppender( addFilter(new AbstractFilter() { override def filter(event: LogEvent): Filter.Result = { - if (OperationLog.getCurrentOperationLog == null) { - Filter.Result.DENY - } else { + if (OperationLog.getCurrentOperationLog.isDefined) { Filter.Result.NEUTRAL + } else { + Filter.Result.DENY } } }) - def initLayout(): StringLayout = { - LogManager.getRootLogger.asInstanceOf[org.apache.logging.log4j.core.Logger] - .getAppenders.values().asScala - .find(ap => ap.isInstanceOf[ConsoleAppender] && ap.getLayout.isInstanceOf[StringLayout]) - .map(_.getLayout.asInstanceOf[StringLayout]) - .getOrElse(PatternLayout.newBuilder().withPattern( - "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n").build()) - } + private val writeLock = DynFields.builder() + .hiddenImpl(classOf[AbstractWriterAppender[_]], "readWriteLock") + .build[ReadWriteLock](this) + .get() + .writeLock /** * Overrides AbstractWriterAppender.append(), which does the real logging. No need @@ -75,11 +75,15 @@ class Log4j2DivertAppender( */ override def append(event: LogEvent): Unit = { super.append(event) - // That should've gone into our writer. Notify the LogContext. - val logOutput = writer.toString - writer.reset() - val log = OperationLog.getCurrentOperationLog - if (log != null) log.write(logOutput) + writeLock.lock() + try { + // That should've gone into our writer. Notify the LogContext. + val logOutput = writer.toString + writer.reset() + OperationLog.getCurrentOperationLog.foreach(_.write(logOutput)) + } finally { + writeLock.unlock() + } } } @@ -95,7 +99,7 @@ object Log4j2DivertAppender { def initialize(): Unit = { val ap = new Log4j2DivertAppender() - org.apache.logging.log4j.LogManager.getRootLogger() + org.apache.logging.log4j.LogManager.getRootLogger .asInstanceOf[org.apache.logging.log4j.core.Logger].addAppender(ap) ap.start() } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala index 84c4ed55c0f..e6312d0fb84 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala @@ -44,7 +44,7 @@ object OperationLog extends Logging { OPERATION_LOG.set(operationLog) } - def getCurrentOperationLog: OperationLog = OPERATION_LOG.get() + def getCurrentOperationLog: Option[OperationLog] = Option(OPERATION_LOG.get) def removeCurrentOperationLog(): Unit = OPERATION_LOG.remove() diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/package.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/package.scala index 11871c5d046..e05ad9fbe73 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/package.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/package.scala @@ -53,6 +53,7 @@ package object kyuubi { val trino_version: String = props.getProperty("kyuubi_trino_version", unknown) val branch: String = props.getProperty("branch", unknown) val revision: String = props.getProperty("revision", unknown) + val revisionTime: String = props.getProperty("revision_time", unknown) val user: String = props.getProperty("user", unknown) val repoUrl: String = props.getProperty("url", unknown) val buildDate: String = props.getProperty("date", unknown) @@ -68,6 +69,7 @@ package object kyuubi { val TRINO_COMPILE_VERSION: String = BuildInfo.trino_version val BRANCH: String = BuildInfo.branch val REVISION: String = BuildInfo.revision + val REVISION_TIME: String = BuildInfo.revisionTime val BUILD_USER: String = BuildInfo.user val REPO_URL: String = BuildInfo.repoUrl val BUILD_DATE: String = BuildInfo.buildDate diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala index e7c2d836573..171e0490137 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala @@ -21,7 +21,7 @@ import java.util.concurrent.{ExecutionException, TimeoutException, TimeUnit} import scala.concurrent.CancellationException -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TGetResultSetMetadataResp, TProtocolVersion, TRowSet} +import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.{OperationHandle, OperationStatus} @@ -35,6 +35,7 @@ abstract class AbstractBackendService(name: String) extends CompositeService(name) with BackendService { private lazy val timeout = conf.get(KyuubiConf.OPERATION_STATUS_POLLING_TIMEOUT) + private lazy val maxRowsLimit = conf.get(KyuubiConf.SERVER_LIMIT_CLIENT_FETCH_MAX_ROWS) override def openSession( protocol: TProtocolVersion, @@ -156,11 +157,14 @@ abstract class AbstractBackendService(name: String) queryId } - override def getOperationStatus(operationHandle: OperationHandle): OperationStatus = { + override def getOperationStatus( + operationHandle: OperationHandle, + maxWait: Option[Long]): OperationStatus = { val operation = sessionManager.operationManager.getOperation(operationHandle) if (operation.shouldRunAsync) { try { - operation.getBackgroundHandle.get(timeout, TimeUnit.MILLISECONDS) + val waitTime = maxWait.getOrElse(timeout) + operation.getBackgroundHandle.get(waitTime, TimeUnit.MILLISECONDS) } catch { case e: TimeoutException => debug(s"$operationHandle: Long polling timed out, ${e.getMessage}") @@ -198,6 +202,12 @@ abstract class AbstractBackendService(name: String) orientation: FetchOrientation, maxRows: Int, fetchLog: Boolean): TRowSet = { + maxRowsLimit.foreach(limit => + if (maxRows > limit) { + throw new IllegalArgumentException(s"Max rows for fetching results " + + s"operation should not exceed the limit: $limit") + }) + sessionManager.operationManager .getOperation(operationHandle) .getSession diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala index e1841156664..968a94197d2 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala @@ -91,7 +91,9 @@ trait BackendService { foreignTable: String): OperationHandle def getQueryId(operationHandle: OperationHandle): String - def getOperationStatus(operationHandle: OperationHandle): OperationStatus + def getOperationStatus( + operationHandle: OperationHandle, + maxWait: Option[Long] = None): OperationStatus def cancelOperation(operationHandle: OperationHandle): Unit def closeOperation(operationHandle: OperationHandle): Unit def getResultSetMetadata(operationHandle: OperationHandle): TGetResultSetMetadataResp diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/ServiceUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/ServiceUtils.scala index d481aea77ab..955144af847 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/ServiceUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/ServiceUtils.scala @@ -17,6 +17,10 @@ package org.apache.kyuubi.service +import java.io.{Closeable, IOException} + +import org.slf4j.Logger + object ServiceUtils { /** @@ -49,4 +53,24 @@ object ServiceUtils { userName.substring(0, indexOfDomainMatch) } } + + /** + * Close the Closeable objects and ignore any [[IOException]] or + * null pointers. Must only be used for cleanup in exception handlers. + * + * @param log the log to record problems to at debug level. Can be null. + * @param closeables the objects to close + */ + def cleanup(log: Logger, closeables: Closeable*): Unit = { + closeables.filter(_ != null).foreach { c => + try { + c.close() + } catch { + case e: IOException => + if (log != null && log.isDebugEnabled) { + log.debug(s"Exception in closing $c", e) + } + } + } + } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala index 74cf4e2e6ef..2e8a8b765e2 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala @@ -163,7 +163,7 @@ abstract class TBinaryFrontendService(name: String) } } sslServerSocket.setEnabledProtocols(enabledProtocols) - info(s"SSL Server Socket enabled protocols: $enabledProtocols") + info(s"SSL Server Socket enabled protocols: ${enabledProtocols.mkString(",")}") case _ => } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala index 4efc617868c..e541c37c015 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala @@ -31,7 +31,7 @@ import org.apache.thrift.transport.TTransport import org.apache.kyuubi.{KyuubiSQLException, Logging, Utils} import org.apache.kyuubi.Utils.stringifyException -import org.apache.kyuubi.config.KyuubiConf.FRONTEND_CONNECTION_URL_USE_HOSTNAME +import org.apache.kyuubi.config.KyuubiConf.{FRONTEND_CONNECTION_URL_USE_HOSTNAME, SESSION_CLOSE_ON_DISCONNECT} import org.apache.kyuubi.config.KyuubiReservedKeys._ import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle} import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory @@ -228,7 +228,7 @@ abstract class TFrontendService(name: String) resp.setStatus(OK_STATUS) } catch { case e: Exception => - error("Error getting type info: ", e) + error("Error getting info: ", e) resp.setInfoValue(TGetInfoValue.lenValue(0)) resp.setStatus(KyuubiSQLException.toTStatus(e)) } @@ -608,7 +608,14 @@ abstract class TFrontendService(name: String) if (handle != null) { info(s"Session [$handle] disconnected without closing properly, close it now") try { - be.closeSession(handle) + val needToClose = be.sessionManager.getSession(handle).conf + .get(SESSION_CLOSE_ON_DISCONNECT.key).getOrElse("true").toBoolean + if (needToClose) { + be.closeSession(handle) + } else { + warn(s"Session not actually closed because configuration " + + s"${SESSION_CLOSE_ON_DISCONNECT.key} is set to false") + } } catch { case e: KyuubiSQLException => error("Failed closing session", e) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/EngineSecuritySecretProvider.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/EngineSecuritySecretProvider.scala index 5bd9e4092eb..2bcfe9a676b 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/EngineSecuritySecretProvider.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/EngineSecuritySecretProvider.scala @@ -18,7 +18,7 @@ package org.apache.kyuubi.service.authentication import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.ENGINE_SECURITY_SECRET_PROVIDER +import org.apache.kyuubi.config.KyuubiConf._ trait EngineSecuritySecretProvider { @@ -33,6 +33,21 @@ trait EngineSecuritySecretProvider { def getSecret(): String } +class SimpleEngineSecuritySecretProviderImpl extends EngineSecuritySecretProvider { + + private var _conf: KyuubiConf = _ + + override def initialize(conf: KyuubiConf): Unit = _conf = conf + + override def getSecret(): String = { + _conf.get(SIMPLE_SECURITY_SECRET_PROVIDER_PROVIDER_SECRET).getOrElse { + throw new IllegalArgumentException( + s"${SIMPLE_SECURITY_SECRET_PROVIDER_PROVIDER_SECRET.key} must be configured " + + s"when ${ENGINE_SECURITY_SECRET_PROVIDER.key} is `simple`.") + } + } +} + object EngineSecuritySecretProvider { def create(conf: KyuubiConf): EngineSecuritySecretProvider = { val providerClass = Class.forName(conf.get(ENGINE_SECURITY_SECRET_PROVIDER)) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImpl.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImpl.scala index b5e08def541..06d08f3e472 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImpl.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImpl.scala @@ -17,17 +17,25 @@ package org.apache.kyuubi.service.authentication -import javax.naming.{Context, NamingException} -import javax.naming.directory.InitialDirContext +import javax.naming.NamingException import javax.security.sasl.AuthenticationException import org.apache.commons.lang3.StringUtils +import org.apache.kyuubi.Logging import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.service.ServiceUtils +import org.apache.kyuubi.service.authentication.LdapAuthenticationProviderImpl.FILTER_FACTORIES +import org.apache.kyuubi.service.authentication.ldap._ -class LdapAuthenticationProviderImpl(conf: KyuubiConf) extends PasswdAuthenticationProvider { +class LdapAuthenticationProviderImpl( + conf: KyuubiConf, + searchFactory: DirSearchFactory = new LdapSearchFactory) + extends PasswdAuthenticationProvider with Logging { + + private val filterOpt: Option[Filter] = FILTER_FACTORIES + .map { f => f.getInstance(conf) } + .collectFirst { case Some(f: Filter) => f } /** * The authenticate method is called by the Kyuubi Server authentication layer @@ -41,47 +49,72 @@ class LdapAuthenticationProviderImpl(conf: KyuubiConf) extends PasswdAuthenticat * @throws AuthenticationException When a user is found to be invalid by the implementation */ override def authenticate(user: String, password: String): Unit = { + + val (usedBind, bindUser, bindPassword) = ( + conf.get(KyuubiConf.AUTHENTICATION_LDAP_BIND_USER), + conf.get(KyuubiConf.AUTHENTICATION_LDAP_BIND_PASSWORD)) match { + case (Some(_bindUser), Some(_bindPw)) => (true, _bindUser, _bindPw) + case _ => + // If no bind user or bind password was specified, + // we assume the user we are authenticating has the ability to search + // the LDAP tree, so we use it as the "binding" account. + // This is the way it worked before bind users were allowed in the LDAP authenticator, + // so we keep existing systems working. + (false, user, password) + } + + var search: DirSearch = null + try { + search = createDirSearch(bindUser, bindPassword) + applyFilter(search, user) + if (usedBind) { + // If we used the bind user, then we need to authenticate again, + // this time using the full user name we got during the bind process. + createDirSearch(search.findUserDn(user), password) + } + } catch { + case e: NamingException => + throw new AuthenticationException( + s"Unable to find the user in the LDAP tree. ${e.getMessage}") + } finally { + ServiceUtils.cleanup(logger, search) + } + } + + @throws[AuthenticationException] + private def createDirSearch(user: String, password: String): DirSearch = { if (StringUtils.isBlank(user)) { throw new AuthenticationException(s"Error validating LDAP user, user is null" + s" or contains blank space") } - if (StringUtils.isBlank(password)) { + if (StringUtils.isBlank(password) || password.getBytes()(0) == 0) { throw new AuthenticationException(s"Error validating LDAP user, password is null" + s" or contains blank space") } - val env = new java.util.Hashtable[String, Any]() - env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory") - env.put(Context.SECURITY_AUTHENTICATION, "simple") - - conf.get(AUTHENTICATION_LDAP_URL).foreach(env.put(Context.PROVIDER_URL, _)) - - val domain = conf.get(AUTHENTICATION_LDAP_DOMAIN) - val u = - if (!hasDomain(user) && domain.nonEmpty) { - user + "@" + domain.get - } else { - user + val principals = LdapUtils.createCandidatePrincipals(conf, user) + val iterator = principals.iterator + while (iterator.hasNext) { + val principal = iterator.next + try { + return searchFactory.getInstance(conf, principal, password) + } catch { + case ex: AuthenticationException => if (iterator.isEmpty) throw ex } - - val guidKey = conf.get(AUTHENTICATION_LDAP_GUIDKEY) - val bindDn = conf.get(AUTHENTICATION_LDAP_BASEDN) match { - case Some(dn) => guidKey + "=" + u + "," + dn - case _ => u } + throw new AuthenticationException(s"No candidate principals for $user was found.") + } - env.put(Context.SECURITY_PRINCIPAL, bindDn) - env.put(Context.SECURITY_CREDENTIALS, password) - - try { - val ctx = new InitialDirContext(env) - ctx.close() - } catch { - case e: NamingException => - throw new AuthenticationException(s"Error validating LDAP user: $bindDn", e) - } + @throws[AuthenticationException] + private def applyFilter(client: DirSearch, user: String): Unit = filterOpt.foreach { filter => + val username = if (LdapUtils.hasDomain(user)) LdapUtils.extractUserName(user) else user + filter.apply(client, username) } +} - private def hasDomain(userName: String): Boolean = ServiceUtils.indexOfDomainMatch(userName) > 0 +object LdapAuthenticationProviderImpl { + val FILTER_FACTORIES: Array[FilterFactory] = Array[FilterFactory]( + CustomQueryFilterFactory, + new ChainFilterFactory(UserSearchFilterFactory, UserFilterFactory, GroupFilterFactory)) } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/ChainFilterFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/ChainFilterFactory.scala new file mode 100644 index 00000000000..a5badb15d76 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/ChainFilterFactory.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.apache.kyuubi.config.KyuubiConf + +/** + * A factory that produces a [[Filter]] that is implemented as a chain of other filters. + * The chain of filters are created as a result of [[ChainFilterFactory#getInstance]] method call. + * The resulting object filters out all users that don't pass all chained filters. + * The filters will be applied in the order they are mentioned in the factory constructor. + */ + +class ChainFilterFactory(chainedFactories: FilterFactory*) extends FilterFactory { + override def getInstance(conf: KyuubiConf): Option[Filter] = { + val maybeFilters = chainedFactories.map(_.getInstance(conf)) + val filters = maybeFilters.flatten + if (filters.isEmpty) None else Some(new ChainFilter(filters)) + } +} + +class ChainFilter(chainedFilters: Seq[Filter]) extends Filter { + @throws[AuthenticationException] + override def apply(client: DirSearch, user: String): Unit = { + chainedFilters.foreach(_.apply(client, user)) + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/CustomQueryFilterFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/CustomQueryFilterFactory.scala new file mode 100644 index 00000000000..d10e6523b3f --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/CustomQueryFilterFactory.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.NamingException +import javax.security.sasl.AuthenticationException + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.KyuubiConf + +/** + * A factory for a [[Filter]] based on a custom query. + *
              + * The produced filter object filters out all users that are not found in the search result + * of the query provided in Kyuubi configuration. + * + * @see [[KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY]] + */ +object CustomQueryFilterFactory extends FilterFactory { + override def getInstance(conf: KyuubiConf): Option[Filter] = + conf.get(KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY) + .map { customQuery => new CustomQueryFilter(customQuery) } +} +class CustomQueryFilter(query: String) extends Filter with Logging { + @throws[AuthenticationException] + override def apply(client: DirSearch, user: String): Unit = { + var resultList: Array[String] = null + try { + resultList = client.executeCustomQuery(query) + } catch { + case e: NamingException => + throw new AuthenticationException(s"LDAP Authentication failed for $user", e) + } + if (resultList != null) { + resultList.foreach { matchedDn => + val shortUserName = LdapUtils.getShortName(matchedDn) + info(s"") + if (shortUserName.equalsIgnoreCase(user) || matchedDn.equalsIgnoreCase(user)) { + info("Authentication succeeded based on result set from LDAP query") + return + } + } + // try a generic user search + if (query.contains("%s")) { + val userSearchQuery = query.replace("%s", user) + info("Trying with generic user search in ldap:" + userSearchQuery) + try resultList = client.executeCustomQuery(userSearchQuery) + catch { + case e: NamingException => + throw new AuthenticationException("LDAP Authentication failed for user", e) + } + if (resultList != null && resultList.length == 1) { + info("Authentication succeeded based on result from custom user search query") + return + } + } + } + info("Authentication failed based on result set from custom LDAP query") + throw new AuthenticationException( + "Authentication failed: LDAP query from property returned no data") + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/DirSearch.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/DirSearch.scala new file mode 100644 index 00000000000..c1c4d506038 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/DirSearch.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import java.io.Closeable +import javax.naming.NamingException + +/** + * The object used for executing queries on the Directory Service. + */ +trait DirSearch extends Closeable { + + /** + * Finds user's distinguished name. + * + * @param user username + * @return DN for the specified username + */ + @throws[NamingException] + def findUserDn(user: String): String + + /** + * Finds group's distinguished name. + * + * @param group group name or unique identifier + * @return DN for the specified group name + */ + @throws[NamingException] + def findGroupDn(group: String): String + + /** + * Verifies that specified user is a member of specified group. + * + * @param user user id or distinguished name + * @param groupDn group's DN + * @return true if the user is a member of the group, false - otherwise. + */ + @throws[NamingException] + def isUserMemberOfGroup(user: String, groupDn: String): Boolean + + /** + * Finds groups that contain the specified user. + * + * @param userDn user's distinguished name + * @return list of groups + */ + @throws[NamingException] + def findGroupsForUser(userDn: String): Array[String] + + /** + * Executes an arbitrary query. + * + * @param query any query + * @return list of names in the namespace + */ + @throws[NamingException] + def executeCustomQuery(query: String): Array[String] +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/DirSearchFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/DirSearchFactory.scala new file mode 100644 index 00000000000..2046632d87d --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/DirSearchFactory.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.apache.kyuubi.config.KyuubiConf + +/** + * A factory for [[DirSearch]]. + */ +trait DirSearchFactory { + + /** + * Returns an instance of [[DirSearch]]. + * + * @param conf Kyuubi configuration + * @param user username + * @param password user password + * @return instance of [[DirSearch]] + */ + @throws[AuthenticationException] + def getInstance(conf: KyuubiConf, user: String, password: String): DirSearch +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/Filter.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/Filter.scala new file mode 100644 index 00000000000..e57eddb0d32 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/Filter.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +/** + * The object that filters LDAP users. + *
              + * The assumption is that this user was already authenticated by a previous bind operation. + */ +trait Filter { + + /** + * Applies this filter to the authenticated user. + * + * @param client LDAP client that will be used for execution of LDAP queries. + * @param user username + */ + @throws[AuthenticationException] + def apply(client: DirSearch, user: String): Unit +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/FilterFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/FilterFactory.scala new file mode 100644 index 00000000000..d85104684a0 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/FilterFactory.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import org.apache.kyuubi.config.KyuubiConf + +/** + * Factory for the filter. + */ +trait FilterFactory { + + /** + * Returns an instance of the corresponding filter. + * + * @param conf Kyuubi configurations used to configure the filter. + * @return Some(filter) or None if this filter doesn't support provided set of properties + */ + def getInstance(conf: KyuubiConf): Option[Filter] +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/GroupFilterFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/GroupFilterFactory.scala new file mode 100644 index 00000000000..fd1c907eccd --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/GroupFilterFactory.scala @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.NamingException +import javax.security.sasl.AuthenticationException + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.KyuubiConf + +object GroupFilterFactory extends FilterFactory { + override def getInstance(conf: KyuubiConf): Option[Filter] = { + val groupFilter = conf.get(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER) + if (groupFilter.isEmpty) { + None + } else if (conf.get(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY).isDefined) { + Some(new UserMembershipKeyFilter(groupFilter)) + } else { + Some(new GroupMembershipKeyFilter(groupFilter)) + } + } +} + +class GroupMembershipKeyFilter(groupFilter: Seq[String]) extends Filter with Logging { + + @throws[AuthenticationException] + override def apply(ldap: DirSearch, user: String): Unit = { + info(s"Authenticating user '$user' using ${classOf[GroupMembershipKeyFilter].getSimpleName})") + + var memberOf: Array[String] = null + try { + val userDn = ldap.findUserDn(user) + // Workaround for magic things on Mockito: + // unmatched invocation returns an empty list if the method return type is JList, + // but null if the method return type is Array + memberOf = Option(ldap.findGroupsForUser(userDn)).getOrElse(Array.empty) + debug(s"User $userDn member of: ${memberOf.mkString(",")}") + } catch { + case e: NamingException => + throw new AuthenticationException("LDAP Authentication failed for user", e) + } + memberOf.foreach { groupDn => + val shortName = LdapUtils.getShortName(groupDn) + if (groupFilter.exists(shortName.equalsIgnoreCase)) { + debug(s"GroupMembershipKeyFilter passes: user '$user' is a member of '$groupDn' group") + info("Authentication succeeded based on group membership") + return + } + } + info("Authentication failed based on user membership") + throw new AuthenticationException( + "Authentication failed: User not a member of specified list") + } +} + +class UserMembershipKeyFilter(groupFilter: Seq[String]) extends Filter with Logging { + @throws[AuthenticationException] + override def apply(ldap: DirSearch, user: String): Unit = { + info(s"Authenticating user '$user' using $classOf[UserMembershipKeyFilter].getSimpleName") + val groupDns = new ArrayBuffer[String] + groupFilter.foreach { groupId => + try { + val groupDn = ldap.findGroupDn(groupId) + groupDns += groupDn + } catch { + case e: NamingException => + warn("Cannot find DN for group", e) + debug(s"Cannot find DN for group $groupId", e) + } + } + if (groupDns.isEmpty) { + debug(s"No DN(s) has been found for any of group(s): ${groupFilter.mkString(",")}") + throw new AuthenticationException("No DN(s) has been found for any of specified group(s)") + } + groupDns.foreach { groupDn => + try { + if (ldap.isUserMemberOfGroup(user, groupDn)) { + debug(s"UserMembershipKeyFilter passes: user '$user' is a member of '$groupDn' group") + info("Authentication succeeded based on user membership") + return + } + } catch { + case e: NamingException => + warn("Cannot match user and group", e) + debug(s"Cannot match user '$user' and group '$groupDn'", e) + } + } + throw new AuthenticationException( + s"Authentication failed: User '$user' is not a member of listed groups") + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearch.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearch.scala new file mode 100644 index 00000000000..09dca1d5c3a --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearch.scala @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.{NamingEnumeration, NamingException} +import javax.naming.directory.{DirContext, SearchResult} + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.KyuubiConf + +/** + * Implements search for LDAP. + * @param conf Kyuubi configuration + * @param ctx Directory service that will be used for the queries. + */ +class LdapSearch(conf: KyuubiConf, ctx: DirContext) extends DirSearch with Logging { + + final private val baseDn = conf.get(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN).orNull + final private val groupBases: Array[String] = + LdapUtils.patternsToBaseDns( + LdapUtils.parseDnPatterns(conf, KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN)) + final private val userPatterns: Array[String] = + LdapUtils.parseDnPatterns(conf, KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN) + final private val userBases: Array[String] = LdapUtils.patternsToBaseDns(userPatterns) + final private val queries: QueryFactory = new QueryFactory(conf) + + /** + * Closes this search object and releases any system resources associated + * with it. If the search object is already closed then invoking this + * method has no effect. + */ + override def close(): Unit = { + try ctx.close() + catch { + case e: NamingException => + warn("Exception when closing LDAP context:", e) + } + } + + @throws[NamingException] + override def findUserDn(user: String): String = { + var allLdapNames: Array[String] = null + if (LdapUtils.isDn(user)) { + val userBaseDn: String = LdapUtils.extractBaseDn(user) + val userRdn: String = LdapUtils.extractFirstRdn(user) + allLdapNames = execute(Array(userBaseDn), queries.findUserDnByRdn(userRdn)).getAllLdapNames + } else { + allLdapNames = findDnByPattern(userPatterns, user) + if (allLdapNames.isEmpty) { + allLdapNames = execute(userBases, queries.findUserDnByName(user)).getAllLdapNames + } + } + if (allLdapNames.length == 1) allLdapNames.head + else { + info(s"Expected exactly one user result for the user: $user, " + + s"but got ${allLdapNames.length}. Returning null") + debug("Matched users: $allLdapNames") + null + } + } + + @throws[NamingException] + private def findDnByPattern(patterns: Seq[String], name: String): Array[String] = { + for (pattern <- patterns) { + val baseDnFromPattern: String = LdapUtils.extractBaseDn(pattern) + val rdn = LdapUtils.extractFirstRdn(pattern).replaceAll("%s", name) + val names = execute(Array(baseDnFromPattern), queries.findDnByPattern(rdn)).getAllLdapNames + if (!names.isEmpty) return names + } + Array.empty + } + + @throws[NamingException] + override def findGroupDn(group: String): String = + execute(groupBases, queries.findGroupDnById(group)).getSingleLdapName + + @throws[NamingException] + override def isUserMemberOfGroup(user: String, groupDn: String): Boolean = { + val userId = LdapUtils.extractUserName(user) + execute(userBases, queries.isUserMemberOfGroup(userId, groupDn)).hasSingleResult + } + + @throws[NamingException] + override def findGroupsForUser(userDn: String): Array[String] = { + val userName = LdapUtils.extractUserName(userDn) + execute(groupBases, queries.findGroupsForUser(userName, userDn)).getAllLdapNames + } + + @throws[NamingException] + override def executeCustomQuery(query: String): Array[String] = + execute(Array(baseDn), queries.customQuery(query)).getAllLdapNamesAndAttributes + + private def execute(baseDns: Array[String], query: Query): SearchResultHandler = { + val searchResults = new ArrayBuffer[NamingEnumeration[SearchResult]] + debug(s"Executing a query: '${query.filter}' with base DNs ${baseDns.mkString(",")}") + baseDns.foreach { baseDn => + try { + val searchResult = ctx.search(baseDn, query.filter, query.controls) + if (searchResult != null) searchResults += searchResult + } catch { + case ex: NamingException => + debug( + s"Exception happened for query '${query.filter}' with base DN '$baseDn'", + ex) + } + } + new SearchResultHandler(searchResults.toArray) + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearchFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearchFactory.scala new file mode 100644 index 00000000000..e3649d359e7 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearchFactory.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import java.util +import javax.naming.{Context, NamingException} +import javax.naming.directory.{DirContext, InitialDirContext} +import javax.security.sasl.AuthenticationException + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.KyuubiConf + +class LdapSearchFactory extends DirSearchFactory with Logging { + @throws[AuthenticationException] + override def getInstance(conf: KyuubiConf, principal: String, password: String): DirSearch = { + try { + val ctx = createDirContext(conf, principal, password) + new LdapSearch(conf, ctx) + } catch { + case e: NamingException => + debug(s"Could not connect to the LDAP Server: Authentication failed for $principal") + throw new AuthenticationException(s"Error validating LDAP user: $principal", e) + } + } + + @throws[NamingException] + private def createDirContext( + conf: KyuubiConf, + principal: String, + password: String): DirContext = { + val ldapUrl = conf.get(KyuubiConf.AUTHENTICATION_LDAP_URL) + val env = new util.Hashtable[String, AnyRef] + ldapUrl.foreach(env.put(Context.PROVIDER_URL, _)) + env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory") + env.put(Context.SECURITY_AUTHENTICATION, "simple") + env.put(Context.SECURITY_PRINCIPAL, principal) + env.put(Context.SECURITY_CREDENTIALS, password) + debug(s"Connecting using principal $principal to ldap server: ${ldapUrl.orNull}") + new InitialDirContext(env) + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapUtils.scala new file mode 100644 index 00000000000..a48f9f48f2b --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/LdapUtils.scala @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.{KyuubiConf, OptionalConfigEntry} +import org.apache.kyuubi.service.ServiceUtils + +/** + * Static utility methods related to LDAP authentication module. + */ +object LdapUtils extends Logging { + + /** + * Extracts a base DN from the provided distinguished name. + *
              + * Example: + *
              + * "ou=CORP,dc=mycompany,dc=com" is the base DN for "cn=user1,ou=CORP,dc=mycompany,dc=com" + * + * @param dn distinguished name + * @return base DN + */ + def extractBaseDn(dn: String): String = { + val indexOfFirstDelimiter = dn.indexOf(",") + if (indexOfFirstDelimiter > -1) { + return dn.substring(indexOfFirstDelimiter + 1) + } + null + } + + /** + * Extracts the first Relative Distinguished Name (RDN). + *
              + * Example: + *
              + * For DN "cn=user1,ou=CORP,dc=mycompany,dc=com" this method will return "cn=user1" + * + * @param dn distinguished name + * @return first RDN + */ + def extractFirstRdn(dn: String): String = dn.substring(0, dn.indexOf(",")) + + /** + * Extracts username from user DN. + *
              + * Examples: + *
              +   * LdapUtils.extractUserName("UserName")                        = "UserName"
              +   * LdapUtils.extractUserName("UserName@mycorp.com")             = "UserName"
              +   * LdapUtils.extractUserName("cn=UserName,dc=mycompany,dc=com") = "UserName"
              +   * 
              + */ + def extractUserName(userDn: String): String = { + if (!isDn(userDn) && !hasDomain(userDn)) { + return userDn + } + val domainIdx: Int = ServiceUtils.indexOfDomainMatch(userDn) + if (domainIdx > 0) { + return userDn.substring(0, domainIdx) + } + if (userDn.contains("=")) { + return userDn.substring(userDn.indexOf("=") + 1, userDn.indexOf(",")) + } + userDn + } + + /** + * Gets value part of the first attribute in the provided RDN. + *
              + * Example: + *
              + * For RDN "cn=user1,ou=CORP" this method will return "user1" + * + * @param rdn Relative Distinguished Name + * @return value part of the first attribute + */ + def getShortName(rdn: String): String = rdn.split(",")(0).split("=")(1) + + /** + * Check for a domain part in the provided username. + *
              + * Example: + *
              + *
              +   * LdapUtils.hasDomain("user1@mycorp.com") = true
              +   * LdapUtils.hasDomain("user1")            = false
              +   * 
              + * + * @param userName username + * @return true if `userName`` contains `@` part + */ + def hasDomain(userName: String): Boolean = { + ServiceUtils.indexOfDomainMatch(userName) > 0 + } + + /** + * Detects DN names. + *
              + * Example: + *
              + *
              +   * LdapUtils.isDn("cn=UserName,dc=mycompany,dc=com") = true
              +   * LdapUtils.isDn("user1")                           = false
              +   * 
              + * + * @param name name to be checked + * @return true if the provided name is a distinguished name + */ + def isDn(name: String): Boolean = { + name.contains("=") + } + + /** + * Reads and parses DN patterns from Kyuubi configuration. + *
              + * If no patterns are provided in the configuration, then the base DN will be used. + * + * @param conf Kyuubi configuration + * @param confKey configuration key to be read + * @return a list of DN patterns + * @see [[KyuubiConf.AUTHENTICATION_LDAP_BASE_DN]] + * @see [[KyuubiConf.AUTHENTICATION_LDAP_GUID_KEY]] + * @see [[KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN]] + * @see [[KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN]] + */ + def parseDnPatterns(conf: KyuubiConf, confKey: OptionalConfigEntry[String]): Array[String] = { + val result = new ArrayBuffer[String] + conf.get(confKey).map { patternsString => + patternsString.split(":").foreach { pattern => + if (pattern.contains(",") && pattern.contains("=")) { + result += pattern + } else { + warn(s"Unexpected format for $confKey, ignoring $pattern") + } + } + }.getOrElse { + val guidAttr = conf.get(KyuubiConf.AUTHENTICATION_LDAP_GUID_KEY) + conf.get(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN).foreach { defaultBaseDn => + result += s"$guidAttr=%s,$defaultBaseDn" + } + } + result.toArray + } + + private def patternToBaseDn(pattern: String): String = + if (pattern.contains("=%s")) pattern.split(",", 2)(1) else pattern + + /** + * Converts a collection of Distinguished Name patterns to a collection of base DNs. + * + * @param patterns Distinguished Name patterns + * @return a list of base DNs + * @see [[KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN]] + * @see [[KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN]] + */ + def patternsToBaseDns(patterns: Array[String]): Array[String] = { + patterns.map(patternToBaseDn) + } + + /** + * Creates a list of principals to be used for user authentication. + * + * @param conf Kyuubi configuration + * @param user username + * @return a list of user's principals + */ + def createCandidatePrincipals(conf: KyuubiConf, user: String): Array[String] = { + if (hasDomain(user) || isDn(user)) { + return Array(user) + } + conf.get(KyuubiConf.AUTHENTICATION_LDAP_DOMAIN).map { ldapDomain => + Array(user + "@" + ldapDomain) + }.getOrElse { + val userPatterns = parseDnPatterns(conf, KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN) + if (userPatterns.isEmpty) { + return Array(user) + } + userPatterns.map(_.replaceAll("%s", user)) + } + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/Query.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/Query.scala new file mode 100644 index 00000000000..ce9a7d47214 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/Query.scala @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import java.util +import javax.naming.directory.SearchControls + +import org.stringtemplate.v4.ST + +/** + * The object that encompasses all components of a Directory Service search query. + * + * @see [[LdapSearch]] + */ +object Query { + + /** + * Creates Query Builder. + * + * @return query builder. + */ + def builder: Query.QueryBuilder = new Query.QueryBuilder + + /** + * A builder of the [[Query]]. + */ + final class QueryBuilder { + private var filterTemplate: ST = _ + private val controls: SearchControls = { + val _controls = new SearchControls + _controls.setSearchScope(SearchControls.SUBTREE_SCOPE) + _controls.setReturningAttributes(new Array[String](0)) + _controls + } + private val returningAttributes: util.List[String] = new util.ArrayList[String] + + /** + * Sets search filter template. + * + * @param filterTemplate search filter template + * @return the current instance of the builder + */ + def filter(filterTemplate: String): Query.QueryBuilder = { + this.filterTemplate = new ST(filterTemplate) + this + } + + /** + * Sets mapping between names in the search filter template and actual values. + * + * @param key marker in the search filter template. + * @param value actual value + * @return the current instance of the builder + */ + def map(key: String, value: String): Query.QueryBuilder = { + filterTemplate.add(key, value) + this + } + + /** + * Sets mapping between names in the search filter template and actual values. + * + * @param key marker in the search filter template. + * @param values array of values + * @return the current instance of the builder + */ + def map(key: String, values: Array[String]): Query.QueryBuilder = { + filterTemplate.add(key, values) + this + } + + /** + * Sets attribute that should be returned in results for the query. + * + * @param attributeName attribute name + * @return the current instance of the builder + */ + def returnAttribute(attributeName: String): Query.QueryBuilder = { + returningAttributes.add(attributeName) + this + } + + /** + * Sets the maximum number of entries to be returned as a result of the search. + *
              + * 0 indicates no limit: all entries will be returned. + * + * @param limit The maximum number of entries that will be returned. + * @return the current instance of the builder + */ + def limit(limit: Int): Query.QueryBuilder = { + controls.setCountLimit(limit) + this + } + + private def validate(): Unit = { + require(filterTemplate != null, "filter is required for LDAP search query") + } + + private def createFilter: String = filterTemplate.render + + private def updateControls(): Unit = { + if (!returningAttributes.isEmpty) controls.setReturningAttributes( + returningAttributes.toArray(new Array[String](returningAttributes.size))) + } + + /** + * Builds an instance of [[Query]]. + * + * @return configured directory service query + */ + def build: Query = { + validate() + val filter: String = createFilter + updateControls() + new Query(filter, controls) + } + } +} + +case class Query(filter: String, controls: SearchControls) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/QueryFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/QueryFactory.scala new file mode 100644 index 00000000000..849006e3845 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/QueryFactory.scala @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import org.apache.kyuubi.config.KyuubiConf + +/** + * A factory for common types of directory service search queries. + */ +final class QueryFactory(conf: KyuubiConf) { + private val USER_OBJECT_CLASSES = Array("person", "user", "inetOrgPerson") + + private val guidAttr = conf.get(KyuubiConf.AUTHENTICATION_LDAP_GUID_KEY) + private val groupClassAttr = conf.get(KyuubiConf.AUTHENTICATION_LDAP_GROUP_CLASS_KEY) + private val groupMembershipAttr = conf.get(KyuubiConf.AUTHENTICATION_LDAP_GROUP_MEMBERSHIP_KEY) + private val userMembershipAttrOpt = conf.get(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY) + + /** + * Returns a query for finding Group DN based on group unique ID. + * + * @param groupId group unique identifier + * @return an instance of [[Query]] + */ + def findGroupDnById(groupId: String): Query = Query.builder + .filter("(&(objectClass=)(=))") + .map("guidAttr", guidAttr) + .map("groupClassAttr", groupClassAttr) + .map("groupID", groupId).limit(2) + .build + + /** + * Returns a query for finding user DN based on user RDN. + * + * @param userRdn user RDN + * @return an instance of [[Query]] + */ + def findUserDnByRdn(userRdn: String): Query = Query.builder + .filter("(&(|)}>)())") + .limit(2) + .map("classes", USER_OBJECT_CLASSES) + .map("userRdn", userRdn).build + + /** + * Returns a query for finding user DN based on DN pattern. + *
              + * Name of this method was derived from the original implementation of LDAP authentication. + * This method should be replaced by [[QueryFactory.findUserDnByRdn]]. + * + * @param rdn user RDN + * @return an instance of [[Query]] + */ + def findDnByPattern(rdn: String): Query = Query.builder + .filter("()") + .map("rdn", rdn) + .limit(2) + .build + + /** + * Returns a query for finding user DN based on user unique name. + * + * @param userName user unique name (uid or sAMAccountName) + * @return an instance of [[Query]] + */ + def findUserDnByName(userName: String): Query = Query.builder + .filter("(&(|)}>)" + + "(|(uid=)(sAMAccountName=)))") + .map("classes", USER_OBJECT_CLASSES) + .map("userName", userName) + .limit(2) + .build + + /** + * Returns a query for finding groups to which the user belongs. + * + * @param userName username + * @param userDn user DN + * @return an instance of [[Query]] + */ + def findGroupsForUser(userName: String, userDn: String): Query = Query.builder + .filter("(&(objectClass=)" + + "(|(=)(=)))") + .map("groupClassAttr", groupClassAttr) + .map("groupMembershipAttr", groupMembershipAttr) + .map("userName", userName) + .map("userDn", userDn) + .build + + /** + * Returns a query for checking whether specified user is a member of specified group. + * + * The query requires [[KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY]] + * configuration property to be set. + * + * @param userId user unique identifier + * @param groupDn group DN + * @return an instance of [[Query]] + * @see [[KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY]] + */ + def isUserMemberOfGroup(userId: String, groupDn: String): Query = { + require( + userMembershipAttrOpt.isDefined, + s"${KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY.key} is not configured.") + + Query.builder + .filter("(&(|)}>)" + + "(=)(=))") + .map("classes", USER_OBJECT_CLASSES) + .map("guidAttr", guidAttr) + .map("userMembershipAttr", userMembershipAttrOpt.get) + .map("userId", userId) + .map("groupDn", groupDn) + .limit(2) + .build + } + + /** + * Returns a query object created for the custom filter. + *
              + * This query is configured to return a group membership attribute as part of the search result. + * + * @param searchFilter custom search filter + * @return an instance of [[Query]] + */ + def customQuery(searchFilter: String): Query = { + val builder = Query.builder + builder.filter(searchFilter) + builder.returnAttribute(groupMembershipAttr) + builder.build + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/SearchResultHandler.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/SearchResultHandler.scala new file mode 100644 index 00000000000..52d5b6a906b --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/SearchResultHandler.scala @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.{NamingEnumeration, NamingException} +import javax.naming.directory.SearchResult + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.Logging + +/** + * The object that handles Directory Service search results. + * In most cases it converts search results into a list of names in the namespace. + */ +object SearchResultHandler { + + /** + * An interface used by [[SearchResultHandler]] for processing records of + * a [[SearchResult]] on a per-record basis. + *
              + * Implementations of this interface perform the actual work of processing each record, + * but don't need to worry about exception handling, closing underlying data structures, + * and combining results from several search requests. + * + * @see SearchResultHandler + */ + trait RecordProcessor extends (SearchResult => Boolean) { + + /** + * Implementations must implement this method to process each record in [[SearchResult]]. + * + * @param record the [[SearchResult]] to precess + * @return true to continue processing, false to stop iterating + * over search results + */ + @throws[NamingException] + override def apply(record: SearchResult): Boolean + } +} + +/** + * Constructs a search result handler object for the provided search results. + * + * @param searchResults directory service search results + */ +class SearchResultHandler(val searchResults: Array[NamingEnumeration[SearchResult]]) + extends Logging { + + /** + * Returns all entries from the search result. + * + * @return a list of names in the namespace + */ + @throws[NamingException] + def getAllLdapNames: Array[String] = { + val result = new ArrayBuffer[String] + handle { record => result += record.getNameInNamespace; true } + result.toArray + } + + /** + * Checks whether search result contains exactly one entry. + * + * @return true if the search result contains a single entry. + */ + @throws[NamingException] + def hasSingleResult: Boolean = { + val allResults = getAllLdapNames + allResults != null && allResults.length == 1 + } + + /** + * Returns a single entry from the search result. + * Throws [[NamingException]] if the search result doesn't contain exactly one entry. + * + * @return name in the namespace + */ + @throws[NamingException] + def getSingleLdapName: String = { + val allLdapNames = getAllLdapNames + if (allLdapNames.length == 1) return allLdapNames.head + throw new NamingException("Single result was expected") + } + + /** + * Returns all entries and all attributes for these entries. + * + * @return a list that includes all entries and all attributes from these entries. + */ + @throws[NamingException] + def getAllLdapNamesAndAttributes: Array[String] = { + val result = new ArrayBuffer[String] + + @throws[NamingException] + def addAllAttributeValuesToResult(values: NamingEnumeration[_]): Unit = { + while (values.hasMore) result += String.valueOf(values.next) + } + handle { record => + result += record.getNameInNamespace + val allAttributes = record.getAttributes.getAll + while (allAttributes.hasMore) { + val attribute = allAttributes.next + addAllAttributeValuesToResult(attribute.getAll) + } + true + } + result.toArray + } + + /** + * Allows for custom processing of the search results. + * + * @param processor [[SearchResultHandler.RecordProcessor]] implementation + */ + @throws[NamingException] + def handle(processor: SearchResultHandler.RecordProcessor): Unit = { + try { + searchResults.foreach { searchResult => + while (searchResult.hasMore) if (!processor.apply(searchResult.next)) return + } + } finally { + searchResults.foreach { searchResult => + try { + searchResult.close() + } catch { + case ex: NamingException => + warn("Failed to close LDAP search result", ex) + } + } + } + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/UserFilterFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/UserFilterFactory.scala new file mode 100644 index 00000000000..7c2f22ed869 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/UserFilterFactory.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.KyuubiConf + +object UserFilterFactory extends FilterFactory with Logging { + override def getInstance(conf: KyuubiConf): Option[Filter] = { + val userFilter = conf.get(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER) + if (userFilter.isEmpty) None else Some(new UserFilter(userFilter)) + } +} + +class UserFilter(_userFilter: Seq[String]) extends Filter with Logging { + + lazy val userFilter: Seq[String] = _userFilter.map(_.toLowerCase) + + @throws[AuthenticationException] + override def apply(ldap: DirSearch, user: String): Unit = { + info("Authenticating user '$user' using user filter") + val userName = LdapUtils.extractUserName(user).toLowerCase + if (!userFilter.contains(userName)) { + info("Authentication failed based on user membership") + throw new AuthenticationException( + "Authentication failed: User not a member of specified list") + } + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/UserSearchFilterFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/UserSearchFilterFactory.scala new file mode 100644 index 00000000000..9e8bdf3640b --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/ldap/UserSearchFilterFactory.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.NamingException +import javax.security.sasl.AuthenticationException + +import org.apache.kyuubi.config.KyuubiConf + +/** + * A factory for a [[Filter]] that check whether provided user could be found in the directory. + *
              + * The produced filter object filters out all users that are not found in the directory. + */ +object UserSearchFilterFactory extends FilterFactory { + override def getInstance(conf: KyuubiConf): Option[Filter] = { + val groupFilter = conf.get(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER) + val userFilter = conf.get(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER) + if (groupFilter.isEmpty && userFilter.isEmpty) None else Some(UserSearchFilter) + } +} + +object UserSearchFilter extends Filter { + @throws[AuthenticationException] + override def apply(client: DirSearch, user: String): Unit = { + try { + val userDn = client.findUserDn(user) + // This should not be null because we were allowed to bind with this username + // safe check in case we were able to bind anonymously. + if (userDn == null) { + throw new AuthenticationException("Authentication failed: User search failed") + } + } catch { + case e: NamingException => + throw new AuthenticationException("LDAP Authentication failed for user", e) + } + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala index 662ac3e58f0..aa46b8d6f76 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala @@ -172,6 +172,11 @@ abstract class SessionManager(name: String) extends CompositeService(name) { execPool.getActiveCount } + def getWorkQueueSize: Int = { + assert(execPool != null) + execPool.getQueue.size() + } + private var _confRestrictList: Set[String] = _ private var _confIgnoreList: Set[String] = _ private var _batchConfIgnoreList: Set[String] = _ @@ -283,9 +288,9 @@ abstract class SessionManager(name: String) extends CompositeService(name) { shutdown = true val shutdownTimeout: Long = if (isServer) { - conf.get(ENGINE_EXEC_POOL_SHUTDOWN_TIMEOUT) - } else { conf.get(SERVER_EXEC_POOL_SHUTDOWN_TIMEOUT) + } else { + conf.get(ENGINE_EXEC_POOL_SHUTDOWN_TIMEOUT) } ThreadUtils.shutdown(timeoutChecker, Duration(shutdownTimeout, TimeUnit.MILLISECONDS)) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala index df72ee339ba..b89580f4c8d 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala @@ -104,4 +104,12 @@ object JdbcUtils extends Logging { case _ => "(empty)" } } + + def isDuplicatedKeyDBErr(cause: Throwable): Boolean = { + val duplicatedKeyKeywords = Seq( + "duplicate key value in a unique or primary key constraint or unique index", // Derby + "Duplicate entry" // MySQL + ) + duplicatedKeyKeywords.exists(cause.getMessage.contains) + } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala index 82417a73092..f320fd90293 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala @@ -18,14 +18,11 @@ package org.apache.kyuubi.util import java.nio.ByteBuffer -import java.sql.Timestamp -import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId} +import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZoneId} import java.time.chrono.IsoChronology -import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatterBuilder import java.time.temporal.ChronoField -import java.util.{Date, Locale, TimeZone} -import java.util.concurrent.TimeUnit +import java.util.{Date, Locale} import scala.language.implicitConversions @@ -37,24 +34,24 @@ private[kyuubi] object RowSetUtils { final private val SECOND_PER_HOUR: Long = SECOND_PER_MINUTE * 60L final private val SECOND_PER_DAY: Long = SECOND_PER_HOUR * 24L - private lazy val dateFormatter = { - createDateTimeFormatterBuilder().appendPattern("yyyy-MM-dd") - .toFormatter(Locale.US) - .withChronology(IsoChronology.INSTANCE) - } + private lazy val dateFormatter = createDateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd") + .toFormatter(Locale.US) + .withChronology(IsoChronology.INSTANCE) private lazy val legacyDateFormatter = FastDateFormat.getInstance("yyyy-MM-dd", Locale.US) - private lazy val timestampFormatter: DateTimeFormatter = { - createDateTimeFormatterBuilder().appendPattern("yyyy-MM-dd HH:mm:ss") - .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) - .toFormatter(Locale.US) - .withChronology(IsoChronology.INSTANCE) - } + private lazy val timeFormatter = createDateTimeFormatterBuilder() + .appendPattern("HH:mm:ss") + .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) + .toFormatter(Locale.US) + .withChronology(IsoChronology.INSTANCE) - private lazy val legacyTimestampFormatter = { - FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) - } + private lazy val timestampFormatter = createDateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd HH:mm:ss") + .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) + .toFormatter(Locale.US) + .withChronology(IsoChronology.INSTANCE) private def createDateTimeFormatterBuilder(): DateTimeFormatterBuilder = { new DateTimeFormatterBuilder().parseCaseInsensitive() @@ -68,6 +65,10 @@ private[kyuubi] object RowSetUtils { dateFormatter.format(ld) } + def formatLocalTime(lt: LocalTime): String = { + timeFormatter.format(lt) + } + def formatLocalDateTime(ldt: LocalDateTime): String = { timestampFormatter.format(ldt) } @@ -77,40 +78,7 @@ private[kyuubi] object RowSetUtils { .getOrElse(timestampFormatter.format(i)) } - def formatTimestamp(t: Timestamp, timeZone: Option[ZoneId] = None): String = { - timeZone.map(zoneId => { - FastDateFormat.getInstance( - legacyTimestampFormatter.getPattern, - TimeZone.getTimeZone(zoneId), - legacyTimestampFormatter.getLocale) - .format(t) - }).getOrElse(legacyTimestampFormatter.format(t)) - } - implicit def bitSetToBuffer(bitSet: java.util.BitSet): ByteBuffer = { ByteBuffer.wrap(bitSet.toByteArray) } - - def toDayTimeIntervalString(d: Duration): String = { - var rest = d.getSeconds - var sign = "" - if (d.getSeconds < 0) { - sign = "-" - rest = -rest - } - val days = TimeUnit.SECONDS.toDays(rest) - rest %= SECOND_PER_DAY - val hours = TimeUnit.SECONDS.toHours(rest) - rest %= SECOND_PER_HOUR - val minutes = TimeUnit.SECONDS.toMinutes(rest) - val seconds = rest % SECOND_PER_MINUTE - f"$sign$days $hours%02d:$minutes%02d:$seconds%02d.${d.getNano}%09d" - } - - def toYearMonthIntervalString(d: Period): String = { - val years = d.getYears - val months = d.getMonths - val sign = if (years < 0 || months < 0) "-" else "" - s"$sign${Math.abs(years)}-${Math.abs(months)}" - } } diff --git a/kyuubi-common/src/test/resources/ldap/ad.example.com.ldif b/kyuubi-common/src/test/resources/ldap/ad.example.com.ldif new file mode 100644 index 00000000000..68cd01d0f31 --- /dev/null +++ b/kyuubi-common/src/test/resources/ldap/ad.example.com.ldif @@ -0,0 +1,150 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +dn: dc=ad,dc=example,dc=com +dc: ad +objectClass: top +objectClass: domain + +dn: ou=Engineering,dc=ad,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: Engineering + +dn: ou=Management,dc=ad,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: Management + +dn: ou=Administration,dc=ad,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: Administration + +dn: ou=Teams,dc=ad,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: Teams + +dn: ou=Resources,dc=ad,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: Resources + +dn: cn=Team 1,ou=Teams,dc=ad,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: microsoftSecurityPrincipal +sAMAccountName: team1 +cn: Team 1 +member: sAMAccountName=engineer1,ou=Engineering,dc=ad,dc=example,dc=com +member: sAMAccountName=manager1,ou=Management,dc=ad,dc=example,dc=com + +dn: cn=Team 2,ou=Teams,dc=ad,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: microsoftSecurityPrincipal +sAMAccountName: team2 +cn: Team 2 +member: sAMAccountName=engineer2,ou=Engineering,dc=ad,dc=example,dc=com +member: sAMAccountName=manager2,ou=Management,dc=ad,dc=example,dc=com + +dn: cn=Resource 1,ou=Resources,dc=ad,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: microsoftSecurityPrincipal +sAMAccountName: resource1 +cn: Resource 1 +member: sAMAccountName=engineer1,ou=Engineering,dc=ad,dc=example,dc=com + +dn: cn=Resource 2,ou=Resources,dc=ad,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: microsoftSecurityPrincipal +sAMAccountName: resource2 +cn: Resource 2 +member: sAMAccountName=engineer2,ou=Engineering,dc=ad,dc=example,dc=com + +dn: cn=Admins,ou=Administration,dc=ad,dc=example,dc=com +objectClass: top +objectClass: groupOfUniqueNames +objectClass: microsoftSecurityPrincipal +sAMAccountName: admins +cn: Admins +uniqueMember: sAMAccountName=admin1,ou=Administration,dc=ad,dc=example,dc=com + +dn: sAMAccountName=engineer1,ou=Engineering,dc=ad,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: microsoftSecurityPrincipal +sAMAccountName: engineer1 +cn: Engineer 1 +sn: Surname 1 +userPassword: engineer1-password +memberOf: cn=Team 1,ou=Teams,dc=ad,dc=example,dc=com +memberOf: cn=Resource 1,ou=Resources,dc=ad,dc=example,dc=com + +dn: sAMAccountName=engineer2,ou=Engineering,dc=ad,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: microsoftSecurityPrincipal +sAMAccountName: engineer2 +cn: Engineer 2 +sn: Surname 2 +userPassword: engineer2-password +memberOf: cn=Team 2,ou=Teams,dc=ad,dc=example,dc=com +memberOf: cn=Resource 2,ou=Resources,dc=ad,dc=example,dc=com + +dn: sAMAccountName=manager1,ou=Management,dc=ad,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: microsoftSecurityPrincipal +sAMAccountName: manager1 +cn: Manager 1 +sn: Surname 1 +userPassword: manager1-password +memberOf: cn=Team 1,ou=Teams,dc=ad,dc=example,dc=com + +dn: sAMAccountName=manager2,ou=Management,dc=ad,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: microsoftSecurityPrincipal +sAMAccountName: manager2 +cn: Manager 2 +sn: Surname 2 +userPassword: manager2-password +memberOf: cn=Team 2,ou=Teams,dc=ad,dc=example,dc=com + +dn: sAMAccountName=admin1,ou=Administration,dc=ad,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: microsoftSecurityPrincipal +sAMAccountName: admin1 +cn: Admin 1 +sn: Surname 1 +userPassword: admin1-password +memberOf: cn=Admins,ou=Administration,dc=ad,dc=example,dc=com diff --git a/kyuubi-common/src/test/resources/ldap/example.com.ldif b/kyuubi-common/src/test/resources/ldap/example.com.ldif new file mode 100644 index 00000000000..f19eb2f930c --- /dev/null +++ b/kyuubi-common/src/test/resources/ldap/example.com.ldif @@ -0,0 +1,113 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +dn: ou=People,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: People +description: Contains entries which describe persons (seamen) + +dn: ou=Groups,dc=example,dc=com +objectClass: top +objectClass: organizationalUnit +ou: Groups +description: Contains entries which describe groups (crews, for instance) + +dn: uid=group1,ou=Groups,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: uidObject +uid: group1 +cn: group1 +ou: Groups +member: uid=user1,ou=People,dc=example,dc=com + +dn: uid=group2,ou=Groups,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: uidObject +uid: group2 +cn: group2 +ou: Groups +member: uid=user2,ou=People,dc=example,dc=com + +dn: cn=group3,ou=Groups,dc=example,dc=com +objectClass: top +objectClass: groupOfNames +objectClass: uidObject +uid: group3 +cn: group3 +ou: Groups +member: cn=user3,ou=People,dc=example,dc=com + +dn: cn=group4,ou=Groups,dc=example,dc=com +objectClass: top +objectClass: groupOfUniqueNames +objectClass: uidObject +uid: group4 +ou: Groups +cn: group4 +uniqueMember: cn=user4,ou=People,dc=example,dc=com + +dn: uid=user1,ou=People,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: uidObject +givenName: Test1 +cn: Test User1 +sn: user1 +uid: user1 +userPassword: user1 + +dn: uid=user2,ou=People,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: uidObject +givenName: Test2 +cn: Test User2 +sn: user2 +uid: user2 +userPassword: user2 + +dn: cn=user3,ou=People,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: uidObject +givenName: Test3 +cn: Test User3 +sn: user3 +uid: user3 +userPassword: user3 + +dn: cn=user4,ou=People,dc=example,dc=com +objectClass: top +objectClass: person +objectClass: organizationalPerson +objectClass: inetOrgPerson +objectClass: uidObject +givenName: Test4 +cn: Test User4 +sn: user4 +uid: user4 +userPassword: user4 + diff --git a/kyuubi-common/src/test/resources/ldap/microsoft.schema.ldif b/kyuubi-common/src/test/resources/ldap/microsoft.schema.ldif new file mode 100644 index 00000000000..3e3a9a5c1be --- /dev/null +++ b/kyuubi-common/src/test/resources/ldap/microsoft.schema.ldif @@ -0,0 +1,62 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +dn: cn=microsoft, ou=schema +objectclass: metaSchema +objectclass: top +cn: microsoft + +dn: ou=attributetypes, cn=microsoft, ou=schema +objectclass: organizationalUnit +objectclass: top +ou: attributetypes + +dn: m-oid=1.2.840.113556.1.4.221, ou=attributetypes, cn=microsoft, ou=schema +objectclass: metaAttributeType +objectclass: metaTop +objectclass: top +m-oid: 1.2.840.113556.1.4.221 +m-name: sAMAccountName +m-equality: caseIgnoreMatch +m-syntax: 1.3.6.1.4.1.1466.115.121.1.15 +m-singleValue: TRUE + +dn: m-oid=1.2.840.113556.1.4.222, ou=attributetypes, cn=microsoft, ou=schema +objectclass: metaAttributeType +objectclass: metaTop +objectclass: top +m-oid: 1.2.840.113556.1.4.222 +m-name: memberOf +m-equality: caseIgnoreMatch +m-syntax: 1.3.6.1.4.1.1466.115.121.1.15 +m-singleValue: FALSE + +dn: ou=objectClasses, cn=microsoft, ou=schema +objectclass: organizationalUnit +objectclass: top +ou: objectClasses + +dn: m-oid=1.2.840.113556.1.5.6, ou=objectClasses, cn=microsoft, ou=schema +objectclass: metaObjectClass +objectclass: metaTop +objectclass: top +m-oid: 1.2.840.113556.1.5.6 +m-name: microsoftSecurityPrincipal +m-supObjectClass: top +m-typeObjectClass: AUXILIARY +m-must: sAMAccountName +m-may: memberOf diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala new file mode 100644 index 00000000000..25a768b75be --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi + +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Path, StandardOpenOption} + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.compat.Platform.EOL + +import com.vladsch.flexmark.formatter.Formatter +import com.vladsch.flexmark.parser.{Parser, ParserEmulationProfile, PegdownExtensions} +import com.vladsch.flexmark.profile.pegdown.PegdownOptionsAdapter +import com.vladsch.flexmark.util.data.{MutableDataHolder, MutableDataSet} +import org.scalatest.Assertions.{assertResult, withClue} + +object MarkdownUtils { + + def verifyOutput( + markdown: Path, + newOutput: MarkdownBuilder, + agent: String, + module: String): Unit = { + val formatted = newOutput.formatMarkdown() + if (System.getenv("KYUUBI_UPDATE") == "1") { + Files.write( + markdown, + formatted.asJava, + StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING) + } else { + val linesInFile = Files.readAllLines(markdown, StandardCharsets.UTF_8) + linesInFile.asScala.zipWithIndex.zip(formatted).foreach { case ((str1, index), str2) => + withClue(s"$markdown out of date, as line ${index + 1} is not expected." + + " Please update doc with KYUUBI_UPDATE=1 build/mvn clean test" + + s" -pl $module -am -Pflink-provided,spark-provided,hive-provided" + + s" -Dtest=none -DwildcardSuites=$agent ") { + assertResult(str2)(str1) + } + } + } + } + + def line(str: String): String = { + str.stripMargin.replaceAll(EOL, "") + } + + def appendBlankLine(buffer: ArrayBuffer[String]): Unit = buffer += "" + + def appendFileContent(buffer: ArrayBuffer[String], path: Path): Unit = { + buffer += "```bash" + buffer ++= Files.readAllLines(path).asScala + buffer += "```" + } +} + +class MarkdownBuilder { + private val buffer = new ArrayBuffer[String]() + + /** + * append a single line + * with replacing EOL to empty string + * @param str single line + * @return + */ + def line(str: String = ""): MarkdownBuilder = { + buffer += str.stripMargin.replaceAll(EOL, "") + this + } + + /** + * append the multiline + * with splitting EOL into single lines + * @param multiline multiline with default line margin "|" + * @return + */ + def lines(multiline: String): MarkdownBuilder = { + buffer ++= multiline.stripMargin.split(EOL) + this + } + + /** + * append the licence + * @return + */ + def licence(): MarkdownBuilder = { + lines(""" + | + |""") + } + + /** + * append the auto-generation hint + * @param className the full class name of agent suite + * @return + */ + def generationHint(className: String): MarkdownBuilder = { + lines(s""" + | + | + |""") + } + + /** + * append file content + * @param path file path + * @return + */ + def file(path: Path): MarkdownBuilder = { + buffer ++= Files.readAllLines(path).asScala + this + } + + /** + * append file content with code block quote + * @param path path to file + * @param language language of codeblock + * @return + */ + def fileWithBlock(path: Path, language: String = "bash"): MarkdownBuilder = { + buffer += s"```$language" + file(path) + buffer += "```" + this + } + + def formatMarkdown(): Stream[String] = { + def createParserOptions(emulationProfile: ParserEmulationProfile): MutableDataHolder = { + PegdownOptionsAdapter.flexmarkOptions(PegdownExtensions.ALL).toMutable + .set(Parser.PARSER_EMULATION_PROFILE, emulationProfile) + } + + def createFormatterOptions( + parserOptions: MutableDataHolder, + emulationProfile: ParserEmulationProfile): MutableDataSet = { + new MutableDataSet() + .set(Parser.EXTENSIONS, Parser.EXTENSIONS.get(parserOptions)) + .set(Formatter.FORMATTER_EMULATION_PROFILE, emulationProfile) + } + + val emulationProfile = ParserEmulationProfile.COMMONMARK + val parserOptions = createParserOptions(emulationProfile) + val formatterOptions = createFormatterOptions(parserOptions, emulationProfile) + val parser = Parser.builder(parserOptions).build + val renderer = Formatter.builder(formatterOptions).build + val document = parser.parse(buffer.mkString(EOL)) + val formattedLines = new ArrayBuffer[String](buffer.length) + val formattedLinesAppendable = new Appendable { + override def append(csq: CharSequence): Appendable = { + if (csq.length() > 0) { + formattedLines.append(csq.toString) + } + this + } + + override def append(csq: CharSequence, start: Int, end: Int): Appendable = { + append(csq.toString.substring(start, end)) + } + + override def append(c: Char): Appendable = { + append(c.toString) + } + } + renderer.render(document, formattedLinesAppendable) + // trim the ending EOL appended by renderer for each line + formattedLines.toStream.map(str => + if (str.endsWith(EOL)) { + str.substring(0, str.length - 1) + } else { + str + }) + } +} + +object MarkdownBuilder { + def apply(licenced: Boolean = true, className: String = null): MarkdownBuilder = { + val builder = new MarkdownBuilder + if (licenced) { builder.licence() } + if (className != null) { builder.generationHint(className) } + builder + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/TestUtils.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/TestUtils.scala index 0c1f9dee116..97675768aec 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/TestUtils.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/TestUtils.scala @@ -14,54 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.kyuubi -import java.nio.charset.StandardCharsets -import java.nio.file.{Files, Path, StandardOpenOption} import java.sql.ResultSet import scala.collection.mutable.ArrayBuffer import com.jakewharton.fliptables.FlipTable -import org.scalatest.Assertions.convertToEqualizer object TestUtils { - - def verifyOutput(markdown: Path, newOutput: ArrayBuffer[String], agent: String): Unit = { - if (System.getenv("KYUUBI_UPDATE") == "1") { - val writer = Files.newBufferedWriter( - markdown, - StandardCharsets.UTF_8, - StandardOpenOption.TRUNCATE_EXISTING, - StandardOpenOption.CREATE) - try { - newOutput.foreach { line => - writer.write(line) - writer.newLine() - } - } finally { - writer.close() - } - } else { - val expected = new ArrayBuffer[String]() - - val reader = Files.newBufferedReader(markdown, StandardCharsets.UTF_8) - var line = reader.readLine() - while (line != null) { - expected += line - line = reader.readLine() - } - reader.close() - val hint = s"$markdown out of date, please update doc with " + - s"KYUUBI_UPDATE=1 build/mvn clean install -Pflink-provided,spark-provided,hive-provided " + - s"-DwildcardSuites=$agent" - assert(newOutput.size === expected.size, hint) - - newOutput.zip(expected).foreach { case (out, in) => assert(out === in, hint) } - } - } - def displayResultSet(resultSet: ResultSet): Unit = { if (resultSet == null) throw new NullPointerException("resultSet == null") val resultSetMetaData = resultSet.getMetaData diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala index f75f299aed3..5973fc6e7a6 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala @@ -49,6 +49,7 @@ class UtilsSuite extends KyuubiFunSuite { assert(props.getProperty("kyuubi_trino_version") === TRINO_COMPILE_VERSION) assert(props.getProperty("branch") === BRANCH) assert(props.getProperty("revision") === REVISION) + assert(props.getProperty("revision_time") === REVISION_TIME) assert(props.getProperty("user") === BUILD_USER) assert(props.getProperty("url") === REPO_URL) assert(props.getProperty("date") === BUILD_DATE) diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveMetadataTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveMetadataTests.scala index fe1f5f47b38..aad31d5b8d4 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveMetadataTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveMetadataTests.scala @@ -17,7 +17,11 @@ package org.apache.kyuubi.operation -import org.apache.kyuubi.Utils +import java.sql.{DatabaseMetaData, ResultSet, SQLException, SQLFeatureNotSupportedException} + +import scala.util.Random + +import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException, Utils} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ // For `hive` external catalog only @@ -98,4 +102,186 @@ trait HiveMetadataTests extends SparkMetadataTests { statement.execute(s"DROP VIEW IF EXISTS ${schemas(3)}.$view_global_test") } } + + test("audit Kyuubi Hive JDBC connection common MetaData") { + withJdbcStatement() { statement => + val metaData = statement.getConnection.getMetaData + Seq( + () => metaData.allProceduresAreCallable(), + () => metaData.getURL, + () => metaData.getUserName, + () => metaData.isReadOnly, + () => metaData.nullsAreSortedHigh, + () => metaData.nullsAreSortedLow, + () => metaData.nullsAreSortedAtStart(), + () => metaData.nullsAreSortedAtEnd(), + () => metaData.usesLocalFiles(), + () => metaData.usesLocalFilePerTable(), + () => metaData.supportsMixedCaseIdentifiers(), + () => metaData.supportsMixedCaseQuotedIdentifiers(), + () => metaData.storesUpperCaseIdentifiers(), + () => metaData.storesUpperCaseQuotedIdentifiers(), + () => metaData.storesLowerCaseIdentifiers(), + () => metaData.storesLowerCaseQuotedIdentifiers(), + () => metaData.storesMixedCaseIdentifiers(), + () => metaData.storesMixedCaseQuotedIdentifiers(), + () => metaData.nullPlusNonNullIsNull, + () => metaData.supportsConvert, + () => metaData.supportsTableCorrelationNames, + () => metaData.supportsDifferentTableCorrelationNames, + () => metaData.supportsExpressionsInOrderBy(), + () => metaData.supportsOrderByUnrelated, + () => metaData.supportsGroupByUnrelated, + () => metaData.supportsGroupByBeyondSelect, + () => metaData.supportsLikeEscapeClause, + () => metaData.supportsMultipleTransactions, + () => metaData.supportsMinimumSQLGrammar, + () => metaData.supportsCoreSQLGrammar, + () => metaData.supportsExtendedSQLGrammar, + () => metaData.supportsANSI92EntryLevelSQL, + () => metaData.supportsANSI92IntermediateSQL, + () => metaData.supportsANSI92FullSQL, + () => metaData.supportsIntegrityEnhancementFacility, + () => metaData.isCatalogAtStart, + () => metaData.supportsSubqueriesInComparisons, + () => metaData.supportsSubqueriesInExists, + () => metaData.supportsSubqueriesInIns, + () => metaData.supportsSubqueriesInQuantifieds, + // Spark support this, see https://issues.apache.org/jira/browse/SPARK-18455 + () => metaData.supportsCorrelatedSubqueries, + () => metaData.supportsOpenCursorsAcrossCommit, + () => metaData.supportsOpenCursorsAcrossRollback, + () => metaData.supportsOpenStatementsAcrossCommit, + () => metaData.supportsOpenStatementsAcrossRollback, + () => metaData.getMaxBinaryLiteralLength, + () => metaData.getMaxCharLiteralLength, + () => metaData.getMaxColumnsInGroupBy, + () => metaData.getMaxColumnsInIndex, + () => metaData.getMaxColumnsInOrderBy, + () => metaData.getMaxColumnsInSelect, + () => metaData.getMaxColumnsInTable, + () => metaData.getMaxConnections, + () => metaData.getMaxCursorNameLength, + () => metaData.getMaxIndexLength, + () => metaData.getMaxSchemaNameLength, + () => metaData.getMaxProcedureNameLength, + () => metaData.getMaxCatalogNameLength, + () => metaData.getMaxRowSize, + () => metaData.doesMaxRowSizeIncludeBlobs, + () => metaData.getMaxStatementLength, + () => metaData.getMaxStatements, + () => metaData.getMaxTableNameLength, + () => metaData.getMaxTablesInSelect, + () => metaData.getMaxUserNameLength, + () => metaData.supportsTransactionIsolationLevel(1), + () => metaData.supportsDataDefinitionAndDataManipulationTransactions, + () => metaData.supportsDataManipulationTransactionsOnly, + () => metaData.dataDefinitionCausesTransactionCommit, + () => metaData.dataDefinitionIgnoredInTransactions, + () => metaData.getColumnPrivileges("", "%", "%", "%"), + () => metaData.getTablePrivileges("", "%", "%"), + () => metaData.getBestRowIdentifier("", "%", "%", 0, true), + () => metaData.getVersionColumns("", "%", "%"), + () => metaData.getExportedKeys("", "default", ""), + () => metaData.supportsResultSetConcurrency(ResultSet.TYPE_FORWARD_ONLY, 2), + () => metaData.ownUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.ownDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.ownInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.othersUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.othersDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.othersInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.updatesAreDetected(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.deletesAreDetected(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.insertsAreDetected(ResultSet.TYPE_FORWARD_ONLY), + () => metaData.supportsNamedParameters(), + () => metaData.supportsMultipleOpenResults, + () => metaData.supportsGetGeneratedKeys, + () => metaData.getSuperTypes("", "%", "%"), + () => metaData.getSuperTables("", "%", "%"), + () => metaData.getAttributes("", "%", "%", "%"), + () => metaData.getResultSetHoldability, + () => metaData.locatorsUpdateCopy, + () => metaData.supportsStatementPooling, + () => metaData.getRowIdLifetime, + () => metaData.supportsStoredFunctionsUsingCallSyntax, + () => metaData.autoCommitFailureClosesAllResultSets, + () => metaData.getFunctionColumns("", "%", "%", "%"), + () => metaData.getPseudoColumns("", "%", "%", "%"), + () => metaData.generatedKeyAlwaysReturned).foreach { func => + val e = intercept[SQLFeatureNotSupportedException](func()) + assert(e.getMessage === "Method not supported") + } + + assert(metaData.allTablesAreSelectable) + assert(metaData.getClientInfoProperties.next) + assert(metaData.getDriverName === "Kyuubi Project Hive JDBC Client" || + metaData.getDriverName === "Kyuubi Project Hive JDBC Shaded Client") + assert(metaData.getDriverVersion === KYUUBI_VERSION) + assert( + metaData.getIdentifierQuoteString === " ", + "This method returns a space \" \" if identifier quoting is not supported") + assert(metaData.getNumericFunctions === "") + assert(metaData.getStringFunctions === "") + assert(metaData.getSystemFunctions === "") + assert(metaData.getTimeDateFunctions === "") + assert(metaData.getSearchStringEscape === "\\") + assert(metaData.getExtraNameCharacters === "") + assert(metaData.supportsAlterTableWithAddColumn()) + assert(!metaData.supportsAlterTableWithDropColumn()) + assert(metaData.supportsColumnAliasing()) + assert(metaData.supportsGroupBy) + assert(!metaData.supportsMultipleResultSets) + assert(!metaData.supportsNonNullableColumns) + assert(metaData.supportsOuterJoins) + assert(metaData.supportsFullOuterJoins) + assert(metaData.supportsLimitedOuterJoins) + assert(metaData.getSchemaTerm === "database") + assert(metaData.getProcedureTerm === "UDF") + assert(metaData.getCatalogTerm === "catalog") + assert(metaData.getCatalogSeparator === ".") + assert(metaData.supportsSchemasInDataManipulation) + assert(!metaData.supportsSchemasInProcedureCalls) + assert(metaData.supportsSchemasInTableDefinitions) + assert(!metaData.supportsSchemasInIndexDefinitions) + assert(!metaData.supportsSchemasInPrivilegeDefinitions) + assert(metaData.supportsCatalogsInDataManipulation) + assert(metaData.supportsCatalogsInProcedureCalls) + assert(metaData.supportsCatalogsInTableDefinitions) + assert(metaData.supportsCatalogsInIndexDefinitions) + assert(metaData.supportsCatalogsInPrivilegeDefinitions) + assert(!metaData.supportsPositionedDelete) + assert(!metaData.supportsPositionedUpdate) + assert(!metaData.supportsSelectForUpdate) + assert(!metaData.supportsStoredProcedures) + // This is actually supported, but hive jdbc package return false + assert(!metaData.supportsUnion) + assert(metaData.supportsUnionAll) + assert(metaData.getMaxColumnNameLength === 128) + assert(metaData.getDefaultTransactionIsolation === java.sql.Connection.TRANSACTION_NONE) + assert(!metaData.supportsTransactions) + assert(!metaData.getProcedureColumns("", "%", "%", "%").next()) + val e1 = intercept[SQLException] { + metaData.getPrimaryKeys("", "default", "src").next() + } + assert(e1.getMessage.contains(KyuubiSQLException.featureNotSupported().getMessage)) + assert(!metaData.getImportedKeys("", "default", "").next()) + + val e2 = intercept[SQLException] { + metaData.getCrossReference("", "default", "src", "", "default", "src2").next() + } + assert(e2.getMessage.contains(KyuubiSQLException.featureNotSupported().getMessage)) + assert(!metaData.getIndexInfo("", "default", "src", true, true).next()) + + assert(metaData.supportsResultSetType(new Random().nextInt())) + assert(!metaData.supportsBatchUpdates) + assert(!metaData.getUDTs(",", "%", "%", null).next()) + assert(!metaData.supportsSavepoints) + assert(!metaData.supportsResultSetHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT)) + assert(metaData.getJDBCMajorVersion === 3) + assert(metaData.getJDBCMinorVersion === 0) + assert(metaData.getSQLStateType === DatabaseMetaData.sqlStateSQL) + assert(metaData.getMaxLogicalLobSize === 0) + assert(!metaData.supportsRefCursors) + } + } } diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala index d14224a842f..e3bb4ccb730 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala @@ -17,11 +17,11 @@ package org.apache.kyuubi.operation -import org.apache.kyuubi.IcebergSuiteMixin +import org.apache.kyuubi.{IcebergSuiteMixin, SPARK_COMPILE_VERSION} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ -import org.apache.kyuubi.util.SparkVersionUtil.isSparkVersionAtLeast +import org.apache.kyuubi.util.SparkVersionUtil -trait IcebergMetadataTests extends HiveJDBCTestHelper with IcebergSuiteMixin { +trait IcebergMetadataTests extends HiveJDBCTestHelper with IcebergSuiteMixin with SparkVersionUtil { test("get catalogs") { withJdbcStatement() { statement => @@ -153,11 +153,11 @@ trait IcebergMetadataTests extends HiveJDBCTestHelper with IcebergSuiteMixin { "date", "timestamp", // SPARK-37931 - if (isSparkVersionAtLeast("3.3")) "struct" + if (SPARK_ENGINE_RUNTIME_VERSION >= "3.3") "struct" else "struct<`X`: bigint, `Y`: double>", "binary", // SPARK-37931 - if (isSparkVersionAtLeast("3.3")) "struct" else "struct<`X`: string>") + if (SPARK_COMPILE_VERSION >= "3.3") "struct" else "struct<`X`: string>") val cols = dataTypes.zipWithIndex.map { case (dt, idx) => s"c$idx" -> dt } val (colNames, _) = cols.unzip diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/JDBCTestHelper.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/JDBCTestHelper.scala index 663fd181644..97330837dc0 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/JDBCTestHelper.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/JDBCTestHelper.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.operation -import java.sql.{DriverManager, SQLException, Statement} +import java.sql.{DriverManager, PreparedStatement, SQLException, Statement} import java.util.Locale import org.apache.kyuubi.KyuubiFunSuite @@ -75,6 +75,31 @@ trait JDBCTestHelper extends KyuubiFunSuite { } } + def withMultipleConnectionJdbcPrepareStatement( + sql: String, + tableNames: String*)(fs: (PreparedStatement => Unit)*): Unit = { + val connections = fs.map { _ => DriverManager.getConnection(jdbcUrlWithConf, user, password) } + val statements = connections.map(_.prepareStatement(sql)) + + try { + statements.zip(fs).foreach { case (s, f) => f(s) } + } finally { + tableNames.foreach { name => + if (name.toUpperCase(Locale.ROOT).startsWith("VIEW")) { + statements.head.execute(s"DROP VIEW IF EXISTS $name") + } else { + statements.head.execute(s"DROP TABLE IF EXISTS $name") + } + } + info("Closing statements") + statements.foreach(_.close()) + info("Closed statements") + info("Closing connections") + connections.foreach(_.close()) + info("Closed connections") + } + } + def withDatabases(dbNames: String*)(fs: (Statement => Unit)*): Unit = { val connections = fs.map { _ => DriverManager.getConnection(jdbcUrlWithConf, user, password) } val statements = connections.map(_.createStatement()) @@ -97,4 +122,10 @@ trait JDBCTestHelper extends KyuubiFunSuite { def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = { withMultipleConnectionJdbcStatement(tableNames: _*)(f) } + + def withJdbcPrepareStatement( + sql: String, + tableNames: String*)(f: PreparedStatement => Unit): Unit = { + withMultipleConnectionJdbcPrepareStatement(sql, tableNames: _*)(f) + } } diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala index 6881677034e..f0dd3e72374 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala @@ -19,15 +19,16 @@ package org.apache.kyuubi.operation import java.sql.{Date, Timestamp} -import org.apache.kyuubi.engine.SemanticVersion +import org.apache.kyuubi.util.SparkVersionUtil -trait SparkDataTypeTests extends HiveJDBCTestHelper { - protected lazy val SPARK_ENGINE_VERSION = sparkEngineMajorMinorVersion +trait SparkDataTypeTests extends HiveJDBCTestHelper with SparkVersionUtil { def resultFormat: String = "thrift" test("execute statement - select null") { - assume(resultFormat == "thrift" || (resultFormat == "arrow" && SPARK_ENGINE_VERSION >= "3.2")) + assume( + resultFormat == "thrift" || + (resultFormat == "arrow" && SPARK_ENGINE_RUNTIME_VERSION >= "3.2")) withJdbcStatement() { statement => val resultSet = statement.executeQuery("SELECT NULL AS col") assert(resultSet.next()) @@ -159,9 +160,10 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } } - test("execute statement - select timestamp") { + test("execute statement - select timestamp - second") { withJdbcStatement() { statement => - val resultSet = statement.executeQuery("SELECT TIMESTAMP '2018-11-17 13:33:33' AS col") + val resultSet = statement.executeQuery( + "SELECT TIMESTAMP '2018-11-17 13:33:33' AS col") assert(resultSet.next()) assert(resultSet.getTimestamp("col") === Timestamp.valueOf("2018-11-17 13:33:33")) val metaData = resultSet.getMetaData @@ -171,13 +173,39 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } } + test("execute statement - select timestamp - millisecond") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery( + "SELECT TIMESTAMP '2018-11-17 13:33:33.12345' AS col") + assert(resultSet.next()) + assert(resultSet.getTimestamp("col") === Timestamp.valueOf("2018-11-17 13:33:33.12345")) + val metaData = resultSet.getMetaData + assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP) + assert(metaData.getPrecision(1) === 29) + assert(metaData.getScale(1) === 9) + } + } + + test("execute statement - select timestamp - overflow") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery( + "SELECT TIMESTAMP '2018-11-17 13:33:33.1234567' AS col") + assert(resultSet.next()) + assert(resultSet.getTimestamp("col") === Timestamp.valueOf("2018-11-17 13:33:33.123456")) + val metaData = resultSet.getMetaData + assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP) + assert(metaData.getPrecision(1) === 29) + assert(metaData.getScale(1) === 9) + } + } + test("execute statement - select timestamp_ntz") { - assume(SPARK_ENGINE_VERSION >= "3.4") + assume(SPARK_ENGINE_RUNTIME_VERSION >= "3.4") withJdbcStatement() { statement => val resultSet = statement.executeQuery( - "SELECT make_timestamp_ntz(2022, 03, 24, 18, 08, 31.800) AS col") + "SELECT make_timestamp_ntz(2022, 03, 24, 18, 08, 31.8888) AS col") assert(resultSet.next()) - assert(resultSet.getTimestamp("col") === Timestamp.valueOf("2022-03-24 18:08:31.800")) + assert(resultSet.getTimestamp("col") === Timestamp.valueOf("2022-03-24 18:08:31.8888")) val metaData = resultSet.getMetaData assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP) assert(metaData.getPrecision(1) === 29) @@ -186,7 +214,9 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } test("execute statement - select daytime interval") { - assume(resultFormat == "thrift" || (resultFormat == "arrow" && SPARK_ENGINE_VERSION >= "3.3")) + assume( + resultFormat == "thrift" || + (resultFormat == "arrow" && SPARK_ENGINE_RUNTIME_VERSION >= "3.3")) withJdbcStatement() { statement => Map( "interval 1 day 1 hour -60 minutes 30 seconds" -> @@ -215,7 +245,7 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { assert(resultSet.next()) val result = resultSet.getString("col") val metaData = resultSet.getMetaData - if (SPARK_ENGINE_VERSION < "3.2") { + if (SPARK_ENGINE_RUNTIME_VERSION < "3.2") { // for spark 3.1 and backwards assert(result === kv._2._2) assert(metaData.getPrecision(1) === Int.MaxValue) @@ -231,7 +261,9 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } test("execute statement - select year/month interval") { - assume(resultFormat == "thrift" || (resultFormat == "arrow" && SPARK_ENGINE_VERSION >= "3.3")) + assume( + resultFormat == "thrift" || + (resultFormat == "arrow" && SPARK_ENGINE_RUNTIME_VERSION >= "3.3")) withJdbcStatement() { statement => Map( "INTERVAL 2022 YEAR" -> Tuple2("2022-0", "2022 years"), @@ -244,7 +276,7 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { assert(resultSet.next()) val result = resultSet.getString("col") val metaData = resultSet.getMetaData - if (SPARK_ENGINE_VERSION < "3.2") { + if (SPARK_ENGINE_RUNTIME_VERSION < "3.2") { // for spark 3.1 and backwards assert(result === kv._2._2) assert(metaData.getPrecision(1) === Int.MaxValue) @@ -260,7 +292,9 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } test("execute statement - select array") { - assume(resultFormat == "thrift" || (resultFormat == "arrow" && SPARK_ENGINE_VERSION >= "3.2")) + assume( + resultFormat == "thrift" || + (resultFormat == "arrow" && SPARK_ENGINE_RUNTIME_VERSION >= "3.2")) withJdbcStatement() { statement => val resultSet = statement.executeQuery( "SELECT array() AS col1, array(1) AS col2, array(null) AS col3") @@ -278,7 +312,9 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } test("execute statement - select map") { - assume(resultFormat == "thrift" || (resultFormat == "arrow" && SPARK_ENGINE_VERSION >= "3.2")) + assume( + resultFormat == "thrift" || + (resultFormat == "arrow" && SPARK_ENGINE_RUNTIME_VERSION >= "3.2")) withJdbcStatement() { statement => val resultSet = statement.executeQuery( "SELECT map() AS col1, map(1, 2, 3, 4) AS col2, map(1, null) AS col3") @@ -296,7 +332,9 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { } test("execute statement - select struct") { - assume(resultFormat == "thrift" || (resultFormat == "arrow" && SPARK_ENGINE_VERSION >= "3.2")) + assume( + resultFormat == "thrift" || + (resultFormat == "arrow" && SPARK_ENGINE_RUNTIME_VERSION >= "3.2")) withJdbcStatement() { statement => val resultSet = statement.executeQuery( "SELECT struct('1', '2') AS col1," + @@ -315,15 +353,4 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper { assert(metaData.getScale(2) == 0) } } - - def sparkEngineMajorMinorVersion: SemanticVersion = { - var sparkRuntimeVer = "" - withJdbcStatement() { stmt => - val result = stmt.executeQuery("SELECT version()") - assert(result.next()) - sparkRuntimeVer = result.getString(1) - assert(!result.next()) - } - SemanticVersion(sparkRuntimeVer) - } } diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkMetadataTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkMetadataTests.scala index 4faf5bba4ff..97099ce4708 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkMetadataTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkMetadataTests.scala @@ -17,11 +17,6 @@ package org.apache.kyuubi.operation -import java.sql.{DatabaseMetaData, ResultSet, SQLException, SQLFeatureNotSupportedException} - -import scala.util.Random - -import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ // For both `in-memory` and `hive` external catalog @@ -292,186 +287,4 @@ trait SparkMetadataTests extends HiveJDBCTestHelper { assert(typeInfo.getInt(NUM_PREC_RADIX) === 0) } } - - test("audit Kyuubi Hive JDBC connection common MetaData") { - withJdbcStatement() { statement => - val metaData = statement.getConnection.getMetaData - Seq( - () => metaData.allProceduresAreCallable(), - () => metaData.getURL, - () => metaData.getUserName, - () => metaData.isReadOnly, - () => metaData.nullsAreSortedHigh, - () => metaData.nullsAreSortedLow, - () => metaData.nullsAreSortedAtStart(), - () => metaData.nullsAreSortedAtEnd(), - () => metaData.usesLocalFiles(), - () => metaData.usesLocalFilePerTable(), - () => metaData.supportsMixedCaseIdentifiers(), - () => metaData.supportsMixedCaseQuotedIdentifiers(), - () => metaData.storesUpperCaseIdentifiers(), - () => metaData.storesUpperCaseQuotedIdentifiers(), - () => metaData.storesLowerCaseIdentifiers(), - () => metaData.storesLowerCaseQuotedIdentifiers(), - () => metaData.storesMixedCaseIdentifiers(), - () => metaData.storesMixedCaseQuotedIdentifiers(), - () => metaData.nullPlusNonNullIsNull, - () => metaData.supportsConvert, - () => metaData.supportsTableCorrelationNames, - () => metaData.supportsDifferentTableCorrelationNames, - () => metaData.supportsExpressionsInOrderBy(), - () => metaData.supportsOrderByUnrelated, - () => metaData.supportsGroupByUnrelated, - () => metaData.supportsGroupByBeyondSelect, - () => metaData.supportsLikeEscapeClause, - () => metaData.supportsMultipleTransactions, - () => metaData.supportsMinimumSQLGrammar, - () => metaData.supportsCoreSQLGrammar, - () => metaData.supportsExtendedSQLGrammar, - () => metaData.supportsANSI92EntryLevelSQL, - () => metaData.supportsANSI92IntermediateSQL, - () => metaData.supportsANSI92FullSQL, - () => metaData.supportsIntegrityEnhancementFacility, - () => metaData.isCatalogAtStart, - () => metaData.supportsSubqueriesInComparisons, - () => metaData.supportsSubqueriesInExists, - () => metaData.supportsSubqueriesInIns, - () => metaData.supportsSubqueriesInQuantifieds, - // Spark support this, see https://issues.apache.org/jira/browse/SPARK-18455 - () => metaData.supportsCorrelatedSubqueries, - () => metaData.supportsOpenCursorsAcrossCommit, - () => metaData.supportsOpenCursorsAcrossRollback, - () => metaData.supportsOpenStatementsAcrossCommit, - () => metaData.supportsOpenStatementsAcrossRollback, - () => metaData.getMaxBinaryLiteralLength, - () => metaData.getMaxCharLiteralLength, - () => metaData.getMaxColumnsInGroupBy, - () => metaData.getMaxColumnsInIndex, - () => metaData.getMaxColumnsInOrderBy, - () => metaData.getMaxColumnsInSelect, - () => metaData.getMaxColumnsInTable, - () => metaData.getMaxConnections, - () => metaData.getMaxCursorNameLength, - () => metaData.getMaxIndexLength, - () => metaData.getMaxSchemaNameLength, - () => metaData.getMaxProcedureNameLength, - () => metaData.getMaxCatalogNameLength, - () => metaData.getMaxRowSize, - () => metaData.doesMaxRowSizeIncludeBlobs, - () => metaData.getMaxStatementLength, - () => metaData.getMaxStatements, - () => metaData.getMaxTableNameLength, - () => metaData.getMaxTablesInSelect, - () => metaData.getMaxUserNameLength, - () => metaData.supportsTransactionIsolationLevel(1), - () => metaData.supportsDataDefinitionAndDataManipulationTransactions, - () => metaData.supportsDataManipulationTransactionsOnly, - () => metaData.dataDefinitionCausesTransactionCommit, - () => metaData.dataDefinitionIgnoredInTransactions, - () => metaData.getColumnPrivileges("", "%", "%", "%"), - () => metaData.getTablePrivileges("", "%", "%"), - () => metaData.getBestRowIdentifier("", "%", "%", 0, true), - () => metaData.getVersionColumns("", "%", "%"), - () => metaData.getExportedKeys("", "default", ""), - () => metaData.supportsResultSetConcurrency(ResultSet.TYPE_FORWARD_ONLY, 2), - () => metaData.ownUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.ownDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.ownInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.othersUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.othersDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.othersInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.updatesAreDetected(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.deletesAreDetected(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.insertsAreDetected(ResultSet.TYPE_FORWARD_ONLY), - () => metaData.supportsNamedParameters(), - () => metaData.supportsMultipleOpenResults, - () => metaData.supportsGetGeneratedKeys, - () => metaData.getSuperTypes("", "%", "%"), - () => metaData.getSuperTables("", "%", "%"), - () => metaData.getAttributes("", "%", "%", "%"), - () => metaData.getResultSetHoldability, - () => metaData.locatorsUpdateCopy, - () => metaData.supportsStatementPooling, - () => metaData.getRowIdLifetime, - () => metaData.supportsStoredFunctionsUsingCallSyntax, - () => metaData.autoCommitFailureClosesAllResultSets, - () => metaData.getFunctionColumns("", "%", "%", "%"), - () => metaData.getPseudoColumns("", "%", "%", "%"), - () => metaData.generatedKeyAlwaysReturned).foreach { func => - val e = intercept[SQLFeatureNotSupportedException](func()) - assert(e.getMessage === "Method not supported") - } - - assert(metaData.allTablesAreSelectable) - assert(metaData.getClientInfoProperties.next) - assert(metaData.getDriverName === "Kyuubi Project Hive JDBC Client" || - metaData.getDriverName === "Kyuubi Project Hive JDBC Shaded Client") - assert(metaData.getDriverVersion === KYUUBI_VERSION) - assert( - metaData.getIdentifierQuoteString === " ", - "This method returns a space \" \" if identifier quoting is not supported") - assert(metaData.getNumericFunctions === "") - assert(metaData.getStringFunctions === "") - assert(metaData.getSystemFunctions === "") - assert(metaData.getTimeDateFunctions === "") - assert(metaData.getSearchStringEscape === "\\") - assert(metaData.getExtraNameCharacters === "") - assert(metaData.supportsAlterTableWithAddColumn()) - assert(!metaData.supportsAlterTableWithDropColumn()) - assert(metaData.supportsColumnAliasing()) - assert(metaData.supportsGroupBy) - assert(!metaData.supportsMultipleResultSets) - assert(!metaData.supportsNonNullableColumns) - assert(metaData.supportsOuterJoins) - assert(metaData.supportsFullOuterJoins) - assert(metaData.supportsLimitedOuterJoins) - assert(metaData.getSchemaTerm === "database") - assert(metaData.getProcedureTerm === "UDF") - assert(metaData.getCatalogTerm === "catalog") - assert(metaData.getCatalogSeparator === ".") - assert(metaData.supportsSchemasInDataManipulation) - assert(!metaData.supportsSchemasInProcedureCalls) - assert(metaData.supportsSchemasInTableDefinitions) - assert(!metaData.supportsSchemasInIndexDefinitions) - assert(!metaData.supportsSchemasInPrivilegeDefinitions) - assert(metaData.supportsCatalogsInDataManipulation) - assert(metaData.supportsCatalogsInProcedureCalls) - assert(metaData.supportsCatalogsInTableDefinitions) - assert(metaData.supportsCatalogsInIndexDefinitions) - assert(metaData.supportsCatalogsInPrivilegeDefinitions) - assert(!metaData.supportsPositionedDelete) - assert(!metaData.supportsPositionedUpdate) - assert(!metaData.supportsSelectForUpdate) - assert(!metaData.supportsStoredProcedures) - // This is actually supported, but hive jdbc package return false - assert(!metaData.supportsUnion) - assert(metaData.supportsUnionAll) - assert(metaData.getMaxColumnNameLength === 128) - assert(metaData.getDefaultTransactionIsolation === java.sql.Connection.TRANSACTION_NONE) - assert(!metaData.supportsTransactions) - assert(!metaData.getProcedureColumns("", "%", "%", "%").next()) - val e1 = intercept[SQLException] { - metaData.getPrimaryKeys("", "default", "src").next() - } - assert(e1.getMessage.contains(KyuubiSQLException.featureNotSupported().getMessage)) - assert(!metaData.getImportedKeys("", "default", "").next()) - - val e2 = intercept[SQLException] { - metaData.getCrossReference("", "default", "src", "", "default", "src2").next() - } - assert(e2.getMessage.contains(KyuubiSQLException.featureNotSupported().getMessage)) - assert(!metaData.getIndexInfo("", "default", "src", true, true).next()) - - assert(metaData.supportsResultSetType(new Random().nextInt())) - assert(!metaData.supportsBatchUpdates) - assert(!metaData.getUDTs(",", "%", "%", null).next()) - assert(!metaData.supportsSavepoints) - assert(!metaData.supportsResultSetHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT)) - assert(metaData.getJDBCMajorVersion === 3) - assert(metaData.getJDBCMinorVersion === 0) - assert(metaData.getSQLStateType === DatabaseMetaData.sqlStateSQL) - assert(metaData.getMaxLogicalLobSize === 0) - assert(!metaData.supportsRefCursors) - } - } } diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala index e297e6281ae..ff8b124813c 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala @@ -28,7 +28,6 @@ import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsRe import org.apache.kyuubi.{KYUUBI_VERSION, Utils} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.util.SparkVersionUtil.isSparkVersionAtLeast trait SparkQueryTests extends SparkDataTypeTests with HiveJDBCTestHelper { @@ -187,7 +186,7 @@ trait SparkQueryTests extends SparkDataTypeTests with HiveJDBCTestHelper { withJdbcStatement("t") { statement => try { val assertTableOrViewNotfound: (Exception, String) => Unit = (e, tableName) => { - if (isSparkVersionAtLeast("3.4")) { + if (SPARK_ENGINE_RUNTIME_VERSION >= "3.4") { assert(e.getMessage.contains("[TABLE_OR_VIEW_NOT_FOUND]")) assert(e.getMessage.contains(s"The table or view `$tableName` cannot be found.")) } else { @@ -433,13 +432,13 @@ trait SparkQueryTests extends SparkDataTypeTests with HiveJDBCTestHelper { expectedFormat = "thrift") checkStatusAndResultSetFormatHint( sql = "set kyuubi.operation.result.format=arrow", - expectedFormat = "arrow") + expectedFormat = "thrift") checkStatusAndResultSetFormatHint( sql = "SELECT 1", expectedFormat = "arrow") checkStatusAndResultSetFormatHint( sql = "set kyuubi.operation.result.format=thrift", - expectedFormat = "thrift") + expectedFormat = "arrow") checkStatusAndResultSetFormatHint( sql = "set kyuubi.operation.result.format", expectedFormat = "thrift") diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala index 758eeeeafaa..fe3cbc7fc75 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala @@ -61,10 +61,10 @@ class OperationLogSuite extends KyuubiFunSuite { assert(!Files.exists(logFile)) OperationLog.setCurrentOperationLog(operationLog) - assert(OperationLog.getCurrentOperationLog === operationLog) + assert(OperationLog.getCurrentOperationLog === Some(operationLog)) OperationLog.removeCurrentOperationLog() - assert(OperationLog.getCurrentOperationLog === null) + assert(OperationLog.getCurrentOperationLog.isEmpty) operationLog.write(msg1 + "\n") assert(Files.exists(logFile)) diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/InternalSecurityAccessorSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/InternalSecurityAccessorSuite.scala index e6c4c850690..e92ac7e6185 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/InternalSecurityAccessorSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/InternalSecurityAccessorSuite.scala @@ -22,9 +22,8 @@ import org.apache.kyuubi.config.KyuubiConf class InternalSecurityAccessorSuite extends KyuubiFunSuite { private val conf = KyuubiConf() - conf.set( - KyuubiConf.ENGINE_SECURITY_SECRET_PROVIDER, - classOf[UserDefinedEngineSecuritySecretProvider].getCanonicalName) + .set(KyuubiConf.ENGINE_SECURITY_SECRET_PROVIDER, "simple") + .set(KyuubiConf.SIMPLE_SECURITY_SECRET_PROVIDER_PROVIDER_SECRET, "ENGINE____SECRET") test("test encrypt/decrypt, issue token/auth token") { Seq("AES/CBC/PKCS5PADDING", "AES/CTR/NoPadding").foreach { cipher => diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAtnProviderSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAtnProviderSuite.scala new file mode 100644 index 00000000000..c3c67e42115 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAtnProviderSuite.scala @@ -0,0 +1,493 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication + +import com.unboundid.ldap.sdk.Entry + +import org.apache.kyuubi.service.authentication.ldap.{LdapAuthenticationTestCase, User} + +class LdapAtnProviderSuite extends WithLdapServer { + + override protected val ldapBaseDn: Array[String] = Array( + "dc=example,dc=com", + "cn=microsoft,ou=schema") + + private val GROUP1_NAME = "group1" + private val GROUP2_NAME = "group2" + private val GROUP3_NAME = "group3" + private val GROUP4_NAME = "group4" + + private val GROUP_ADMINS_NAME = "admins" + private val GROUP_TEAM1_NAME = "team1" + private val GROUP_TEAM2_NAME = "team2" + private val GROUP_RESOURCE1_NAME = "resource1" + private val GROUP_RESOURCE2_NAME = "resource2" + + private val USER1 = + User.useIdForPassword(id = "user1", dn = "uid=user1,ou=People,dc=example,dc=com") + + private val USER2 = + User.useIdForPassword(id = "user2", dn = "uid=user2,ou=People,dc=example,dc=com") + + private val USER3 = + User.useIdForPassword(id = "user3", dn = "cn=user3,ou=People,dc=example,dc=com") + + private val USER4 = + User.useIdForPassword(id = "user4", dn = "cn=user4,ou=People,dc=example,dc=com") + + private val ENGINEER_1 = User( + id = "engineer1", + dn = "sAMAccountName=engineer1,ou=Engineering,dc=ad,dc=example,dc=com", + password = "engineer1-password") + + private val ENGINEER_2 = User( + id = "engineer2", + dn = "sAMAccountName=engineer2,ou=Engineering,dc=ad,dc=example,dc=com", + password = "engineer2-password") + + private val MANAGER_1 = User( + id = "manager1", + dn = "sAMAccountName=manager1,ou=Management,dc=ad,dc=example,dc=com", + password = "manager1-password") + + private val MANAGER_2 = User( + id = "manager2", + dn = "sAMAccountName=manager2,ou=Management,dc=ad,dc=example,dc=com", + password = "manager2-password") + + private val ADMIN_1 = User( + id = "admin1", + dn = "sAMAccountName=admin1,ou=Administration,dc=ad,dc=example,dc=com", + password = "admin1-password") + + private var testCase: LdapAuthenticationTestCase = _ + + private def defaultBuilder = LdapAuthenticationTestCase.builder.ldapUrl(ldapUrl) + + override def beforeAll(): Unit = { + super.beforeAll() + ldapServer.add(new Entry( + "dn: dc=example,dc=com", + "dc: example", + "objectClass: top", + "objectClass: domain")) + + applyLDIF("ldap/example.com.ldif") + applyLDIF("ldap/microsoft.schema.ldif") + applyLDIF("ldap/ad.example.com.ldif") + } + + test("In-Memory LDAP server is started") { + assert(ldapServer.getListenPort > 0) + } + + test("UserBindPositiveWithShortname") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + } + + test("UserBindPositiveWithShortnameOldConfig") { + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + } + + test("UserBindNegativeWithShortname") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .build + testCase.assertAuthenticateFailsUsingWrongPassword(USER1.credentialsWithId) + testCase.assertAuthenticateFailsUsingWrongPassword(USER2.credentialsWithId) + } + + test("UserBindNegativeWithShortnameOldConfig") { + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .build + testCase.assertAuthenticateFailsUsingWrongPassword(USER1.credentialsWithId) + testCase.assertAuthenticateFails(USER1.dn, USER2.password) + testCase.assertAuthenticateFailsUsingWrongPassword(USER2.credentialsWithId) + } + + test("UserBindPositiveWithDN") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserBindPositiveWithDNOldConfig") { + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserBindPositiveWithDNWrongOldConfig") { + testCase = defaultBuilder + .baseDN("ou=DummyPeople,dc=example,dc=com") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserBindPositiveWithDNWrongConfig") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=DummyPeople,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=DummyGroups,dc=example,dc=com") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserBindPositiveWithDNBlankConfig") { + testCase = defaultBuilder + .userDNPatterns(" ") + .groupDNPatterns(" ") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserBindPositiveWithDNBlankOldConfig") { + testCase = defaultBuilder.baseDN("").build + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserBindNegativeWithDN") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .build + testCase.assertAuthenticateFailsUsingWrongPassword(USER1.credentialsWithDn) + testCase.assertAuthenticateFails(USER1.dn, USER2.password) + testCase.assertAuthenticateFailsUsingWrongPassword(USER2.credentialsWithDn) + } + + test("UserBindNegativeWithDNOldConfig") { + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .build + testCase.assertAuthenticateFailsUsingWrongPassword(USER1.credentialsWithDn) + testCase.assertAuthenticateFails(USER1.dn, USER2.password) + testCase.assertAuthenticateFailsUsingWrongPassword(USER2.credentialsWithDn) + } + + test("UserFilterPositive") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .userFilters(USER1.id) + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .userFilters(USER2.id) + .build + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .userFilters(USER1.id, USER2.id) + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserFilterNegative") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .userFilters(USER2.id) + .build + testCase.assertAuthenticateFails(USER1.credentialsWithId) + testCase.assertAuthenticateFails(USER1.credentialsWithDn) + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .userFilters(USER1.id) + .build + testCase.assertAuthenticateFails(USER2.credentialsWithId) + testCase.assertAuthenticateFails(USER2.credentialsWithDn) + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .userFilters(USER3.id) + .build + testCase.assertAuthenticateFails(USER1.credentialsWithId) + testCase.assertAuthenticateFails(USER2.credentialsWithId) + } + + test("GroupFilterPositive") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .groupFilters(GROUP1_NAME, GROUP2_NAME) + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .groupFilters(GROUP2_NAME) + .build + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("GroupFilterNegative") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .groupFilters(GROUP2_NAME) + .build + testCase.assertAuthenticateFails(USER1.credentialsWithId) + testCase.assertAuthenticateFails(USER1.credentialsWithDn) + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .groupFilters(GROUP1_NAME) + .build + testCase.assertAuthenticateFails(USER2.credentialsWithId) + testCase.assertAuthenticateFails(USER2.credentialsWithDn) + } + + test("UserAndGroupFilterPositive") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .userFilters(USER1.id, USER2.id) + .groupFilters(GROUP1_NAME, GROUP2_NAME) + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + } + + test("UserAndGroupFilterNegative") { + testCase = defaultBuilder + .userDNPatterns("uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("uid=%s,ou=Groups,dc=example,dc=com") + .userFilters(USER1.id, USER2.id) + .groupFilters(GROUP3_NAME, GROUP3_NAME) + .build + testCase.assertAuthenticateFails(USER2.credentialsWithDn) + testCase.assertAuthenticateFails(USER2.credentialsWithId) + testCase.assertAuthenticateFails(USER3.credentialsWithDn) + testCase.assertAuthenticateFails(USER3.credentialsWithId) + } + + test("CustomQueryPositive") { + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com", "uid=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=People,dc=example,dc=com") + .customQuery(String.format("(&(objectClass=person)(|(uid=%s)(uid=%s)))", USER1.id, USER4.id)) + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER4.credentialsWithId) + testCase.assertAuthenticatePasses(USER4.credentialsWithDn) + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .customQuery("(&(objectClass=person)(uid=%s))") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + } + + test("CustomQueryNegative") { + testCase = defaultBuilder + .baseDN("ou=People,dc=example,dc=com") + .customQuery("(&(objectClass=person)(cn=%s))") + .build + testCase.assertAuthenticateFails(USER2.credentialsWithDn) + testCase.assertAuthenticateFails(USER2.credentialsWithId) + } + + /** + * Test to test the LDAP Atn to use a custom LDAP query that returns + * a) A set of group DNs + * b) A combination of group(s) DN and user DN + * LDAP atn is expected to extract the members of the group using the attribute value for + * `kyuubi.authentication.ldap.userMembershipKey` + */ + test("CustomQueryWithGroupsPositive") { + testCase = defaultBuilder + .baseDN("dc=example,dc=com") + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com", "uid=%s,ou=People,dc=example,dc=com") + .customQuery(s"(&(objectClass=groupOfNames)(|(cn=$GROUP1_NAME)(cn=$GROUP2_NAME)))") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER2.credentialsWithId) + testCase.assertAuthenticatePasses(USER2.credentialsWithDn) + // the following test uses a query that returns a group and a user entry. + // the ldap atn should use the groupMembershipKey to identify the users for the returned group + // and the authentication should succeed for the users of that group as well as the lone user4 + // in this case + testCase = defaultBuilder + .baseDN("dc=example,dc=com") + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com", "uid=%s,ou=People,dc=example,dc=com") + .customQuery( + s"(|(&(objectClass=groupOfNames)(cn=$GROUP1_NAME))(&(objectClass=person)(sn=${USER4.id})))") + .build + testCase.assertAuthenticatePasses(USER1.credentialsWithId) + testCase.assertAuthenticatePasses(USER1.credentialsWithDn) + testCase.assertAuthenticatePasses(USER4.credentialsWithId) + testCase.assertAuthenticatePasses(USER4.credentialsWithDn) + testCase = defaultBuilder + .baseDN("dc=example,dc=com") + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com", "uid=%s,ou=People,dc=example,dc=com") + .groupMembershipKey("uniqueMember") + .customQuery(s"(&(objectClass=groupOfUniqueNames)(cn=$GROUP4_NAME))") + .build + testCase.assertAuthenticatePasses(USER4.credentialsWithId) + testCase.assertAuthenticatePasses(USER4.credentialsWithDn) + } + + test("CustomQueryWithGroupsNegative") { + testCase = defaultBuilder + .baseDN("dc=example,dc=com") + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com", "uid=%s,ou=People,dc=example,dc=com") + .customQuery(s"(&(objectClass=groupOfNames)(|(cn=$GROUP1_NAME)(cn=$GROUP2_NAME)))") + .build + testCase.assertAuthenticateFails(USER3.credentialsWithDn) + testCase.assertAuthenticateFails(USER3.credentialsWithId) + } + + test("GroupFilterPositiveWithCustomGUID") { + testCase = defaultBuilder + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=Groups,dc=example,dc=com") + .groupFilters(GROUP3_NAME) + .guidKey("cn") + .build + testCase.assertAuthenticatePasses(USER3.credentialsWithId) + testCase.assertAuthenticatePasses(USER3.credentialsWithDn) + } + + test("GroupFilterPositiveWithCustomAttributes") { + testCase = defaultBuilder + .userDNPatterns("cn=%s,ou=People,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=Groups,dc=example,dc=com") + .groupFilters(GROUP4_NAME) + .guidKey("cn") + .groupMembershipKey("uniqueMember") + .groupClassKey("groupOfUniqueNames") + .build + testCase.assertAuthenticatePasses(USER4.credentialsWithId) + testCase.assertAuthenticatePasses(USER4.credentialsWithDn) + } + + test("DirectUserMembershipGroupFilterPositive") { + testCase = defaultBuilder + .userDNPatterns( + "sAMAccountName=%s,ou=Engineering,dc=ad,dc=example,dc=com", + "sAMAccountName=%s,ou=Management,dc=ad,dc=example,dc=com") + .groupDNPatterns( + "sAMAccountName=%s,ou=Teams,dc=ad,dc=example,dc=com", + "sAMAccountName=%s,ou=Resources,dc=ad,dc=example,dc=com") + .groupFilters(GROUP_TEAM1_NAME, GROUP_TEAM2_NAME, GROUP_RESOURCE1_NAME, GROUP_RESOURCE2_NAME) + .guidKey("sAMAccountName") + .userMembershipKey("memberOf") + .build + testCase.assertAuthenticatePasses(ENGINEER_1.credentialsWithId) + testCase.assertAuthenticatePasses(ENGINEER_2.credentialsWithId) + testCase.assertAuthenticatePasses(MANAGER_1.credentialsWithId) + testCase.assertAuthenticatePasses(MANAGER_2.credentialsWithId) + } + + test("DirectUserMembershipGroupFilterNegative") { + testCase = defaultBuilder + .userDNPatterns( + "sAMAccountName=%s,ou=Engineering,dc=ad,dc=example,dc=com", + "sAMAccountName=%s,ou=Management,dc=ad,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=Teams,dc=ad,dc=example,dc=com") + .groupFilters(GROUP_TEAM1_NAME) + .guidKey("sAMAccountName") + .userMembershipKey("memberOf") + .build + testCase.assertAuthenticateFails(ENGINEER_2.credentialsWithId) + testCase.assertAuthenticateFails(MANAGER_2.credentialsWithId) + } + + test("DirectUserMembershipGroupFilterNegativeWithoutUserBases") { + testCase = defaultBuilder + .groupDNPatterns("cn=%s,ou=Teams,dc=ad,dc=example,dc=com") + .groupFilters(GROUP_TEAM1_NAME) + .guidKey("sAMAccountName") + .userMembershipKey("memberOf") + .build + testCase.assertAuthenticateFails(ENGINEER_1.credentialsWithId) + testCase.assertAuthenticateFails(ENGINEER_2.credentialsWithId) + testCase.assertAuthenticateFails(MANAGER_1.credentialsWithId) + testCase.assertAuthenticateFails(MANAGER_2.credentialsWithId) + } + + test("DirectUserMembershipGroupFilterWithDNCredentials") { + testCase = defaultBuilder + .userDNPatterns("sAMAccountName=%s,ou=Engineering,dc=ad,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=Teams,dc=ad,dc=example,dc=com") + .groupFilters(GROUP_TEAM1_NAME) + .guidKey("sAMAccountName") + .userMembershipKey("memberOf") + .build + testCase.assertAuthenticatePasses(ENGINEER_1.credentialsWithDn) + testCase.assertAuthenticateFails(MANAGER_1.credentialsWithDn) + } + + test("DirectUserMembershipGroupFilterWithDifferentGroupClassKey") { + testCase = defaultBuilder + .userDNPatterns("sAMAccountName=%s,ou=Administration,dc=ad,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=Administration,dc=ad,dc=example,dc=com") + .groupFilters(GROUP_ADMINS_NAME).guidKey("sAMAccountName") + .userMembershipKey("memberOf") + .groupClassKey("groupOfUniqueNames") + .build + testCase.assertAuthenticatePasses(ADMIN_1.credentialsWithId) + testCase.assertAuthenticateFails(ENGINEER_1.credentialsWithId) + testCase.assertAuthenticateFails(MANAGER_1.credentialsWithDn) + } + + test("DirectUserMembershipGroupFilterNegativeWithWrongGroupClassKey") { + testCase = defaultBuilder + .userDNPatterns("sAMAccountName=%s,ou=Administration,dc=ad,dc=example,dc=com") + .groupDNPatterns("cn=%s,ou=Administration,dc=ad,dc=example,dc=com") + .groupFilters(GROUP_ADMINS_NAME).guidKey("sAMAccountName") + .userMembershipKey("memberOf") + .groupClassKey("wrongClass") + .build + testCase.assertAuthenticateFails(ADMIN_1.credentialsWithId) + testCase.assertAuthenticateFails(ENGINEER_1.credentialsWithId) + testCase.assertAuthenticateFails(MANAGER_1.credentialsWithDn) + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImplSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImplSuite.scala index 63941162865..718fc6f6ebd 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImplSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/LdapAuthenticationProviderImplSuite.scala @@ -17,63 +17,357 @@ package org.apache.kyuubi.service.authentication -import javax.naming.CommunicationException +import javax.naming.NamingException import javax.security.sasl.AuthenticationException +import org.mockito.ArgumentMatchers.{any, anyString, eq => mockEq, isA} +import org.mockito.Mockito._ +import org.scalatestplus.mockito.MockitoSugar.mock + import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.service.authentication.ldap.{DirSearch, DirSearchFactory, LdapSearchFactory} class LdapAuthenticationProviderImplSuite extends WithLdapServer { - override protected val ldapUser: String = "kentyao" - override protected val ldapUserPasswd: String = "kentyao" - private val conf = new KyuubiConf() + private var conf: KyuubiConf = _ + private var factory: DirSearchFactory = _ + private var search: DirSearch = _ + private var auth: LdapAuthenticationProviderImpl = _ - override def beforeAll(): Unit = { - super.beforeAll() + override protected def beforeEach(): Unit = { + super.beforeEach() + conf = new KyuubiConf() conf.set(AUTHENTICATION_LDAP_URL, ldapUrl) + factory = mock[DirSearchFactory] + search = mock[DirSearch] + when(factory.getInstance(any(classOf[KyuubiConf]), anyString, anyString)) + .thenReturn(search) + } + + test("authenticateGivenBlankOrNullPassword") { + Seq("", "\0", null).foreach { pwd => + auth = new LdapAuthenticationProviderImpl(conf, new LdapSearchFactory) + val thrown = intercept[AuthenticationException] { + auth.authenticate("user", pwd) + } + assert(thrown.getMessage.contains("is null or contains blank space")) + } + } + + test("AuthenticateNoUserOrGroupFilter") { + conf.set( + AUTHENTICATION_LDAP_USER_DN_PATTERN, + "cn=%s,ou=Users,dc=mycorp,dc=com:cn=%s,ou=PowerUsers,dc=mycorp,dc=com") + val factory = mock[DirSearchFactory] + lenient + .when(search.findUserDn("user1")) + .thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + when(factory.getInstance(conf, "cn=user1,ou=PowerUsers,dc=mycorp,dc=com", "Blah")) + .thenReturn(search) + when(factory.getInstance(conf, "cn=user1,ou=Users,dc=mycorp,dc=com", "Blah")) + .thenThrow(classOf[AuthenticationException]) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate("user1", "Blah") + + verify(factory, times(2)).getInstance(isA(classOf[KyuubiConf]), anyString, mockEq("Blah")) + verify(search, atLeastOnce).close() + } + + test("AuthenticateWhenUserFilterPasses") { + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1,user2") + + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + when(search.findUserDn("user2")).thenReturn("cn=user2,ou=PowerUsers,dc=mycorp,dc=com") + + authenticateUserAndCheckSearchIsClosed("user1") + authenticateUserAndCheckSearchIsClosed("user2") + } + + test("AuthenticateWhenLoginWithDomainAndUserFilterPasses") { + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1") + + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + + authenticateUserAndCheckSearchIsClosed("user1@mydomain.com") + } + + test("AuthenticateWhenLoginWithDnAndUserFilterPasses") { + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1") + + when(search.findUserDn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + + authenticateUserAndCheckSearchIsClosed("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") } - override def afterAll(): Unit = { - super.afterAll() + test("AuthenticateWhenUserSearchFails") { + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1,user2") + intercept[AuthenticationException] { + when(search.findUserDn("user1")).thenReturn(null) + authenticateUserAndCheckSearchIsClosed("user1") + } } - test("ldap server is started") { - assert(ldapServer.getListenPort > 0) + test("AuthenticateWhenUserFilterFails") { + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1,user2") + intercept[AuthenticationException] { + when(search.findUserDn("user3")).thenReturn("cn=user3,ou=PowerUsers,dc=mycorp,dc=com") + authenticateUserAndCheckSearchIsClosed("user3") + } } - test("authenticate tests") { - val providerImpl = new LdapAuthenticationProviderImpl(conf) - val e1 = intercept[AuthenticationException](providerImpl.authenticate("", "")) - assert(e1.getMessage.contains("user is null")) - val e2 = intercept[AuthenticationException](providerImpl.authenticate("kyuubi", "")) - assert(e2.getMessage.contains("password is null")) + test("AuthenticateWhenGroupMembershipKeyFilterPasses") { + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "group1,group2") + + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + when(search.findUserDn("user2")).thenReturn("cn=user2,ou=PowerUsers,dc=mycorp,dc=com") - val user = "uid=kentyao,ou=users" - providerImpl.authenticate(user, "kentyao") - val e3 = intercept[AuthenticationException]( - providerImpl.authenticate(user, "kent")) - assert(e3.getMessage.contains(user)) - assert(e3.getCause.isInstanceOf[javax.naming.AuthenticationException]) + when(search.findGroupsForUser("cn=user1,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn(Array( + "cn=testGroup,ou=Groups,dc=mycorp,dc=com", + "cn=group1,ou=Groups,dc=mycorp,dc=com")) + when(search.findGroupsForUser("cn=user2,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn(Array( + "cn=testGroup,ou=Groups,dc=mycorp,dc=com", + "cn=group2,ou=Groups,dc=mycorp,dc=com")) + + authenticateUserAndCheckSearchIsClosed("user1") + authenticateUserAndCheckSearchIsClosed("user2") + } - conf.set(AUTHENTICATION_LDAP_BASEDN, ldapBaseDn) - val providerImpl2 = new LdapAuthenticationProviderImpl(conf) - providerImpl2.authenticate("kentyao", "kentyao") + test("AuthenticateWhenUserAndGroupMembershipKeyFiltersPass") { + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "group1,group2") + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1,user2") - val e4 = intercept[AuthenticationException]( - providerImpl.authenticate("kentyao", "kent")) - assert(e4.getMessage.contains(user)) + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + when(search.findUserDn("user2")).thenReturn("cn=user2,ou=PowerUsers,dc=mycorp,dc=com") - conf.unset(AUTHENTICATION_LDAP_URL) - val providerImpl3 = new LdapAuthenticationProviderImpl(conf) - val e5 = intercept[AuthenticationException]( - providerImpl3.authenticate("kentyao", "kentyao")) + when(search.findGroupsForUser("cn=user1,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn(Array( + "cn=testGroup,ou=Groups,dc=mycorp,dc=com", + "cn=group1,ou=Groups,dc=mycorp,dc=com")) + when(search.findGroupsForUser("cn=user2,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn(Array( + "cn=testGroup,ou=Groups,dc=mycorp,dc=com", + "cn=group2,ou=Groups,dc=mycorp,dc=com")) + + authenticateUserAndCheckSearchIsClosed("user1") + authenticateUserAndCheckSearchIsClosed("user2") + } + + test("AuthenticateWhenUserFilterPassesAndGroupMembershipKeyFilterFails") { + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "group1,group2") + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1,user2") + intercept[AuthenticationException] { + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + when(search.findGroupsForUser("cn=user1,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn(Array( + "cn=testGroup,ou=Groups,dc=mycorp,dc=com", + "cn=OtherGroup,ou=Groups,dc=mycorp,dc=com")) + authenticateUserAndCheckSearchIsClosed("user1") + } + } + + test("AuthenticateWhenUserFilterFailsAndGroupMembershipKeyFilterPasses") { + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "group3") + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user1,user2") + intercept[AuthenticationException] { + when(search.findUserDn("user3")).thenReturn("cn=user3,ou=PowerUsers,dc=mycorp,dc=com") + lenient.when(search.findGroupsForUser("cn=user3,ou=PowerUsers,dc=mycorp,dc=com")) + .thenReturn(Array( + "cn=testGroup,ou=Groups,dc=mycorp,dc=com", + "cn=group3,ou=Groups,dc=mycorp,dc=com")) + authenticateUserAndCheckSearchIsClosed("user3") + } + } + + test("AuthenticateWhenCustomQueryFilterPasses") { + conf.set(AUTHENTICATION_LDAP_BASE_DN, "dc=mycorp,dc=com") + conf.set( + AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY, + "(&(objectClass=person)(|(memberOf=CN=Domain Admins,CN=Users,DC=apache,DC=org)" + + "(memberOf=CN=Administrators,CN=Builtin,DC=apache,DC=org)))") + + when(search.executeCustomQuery(anyString)) + .thenReturn(Array( + "cn=user1,ou=PowerUsers,dc=mycorp,dc=com", + "cn=user2,ou=PowerUsers,dc=mycorp,dc=com")) + + authenticateUserAndCheckSearchIsClosed("user1") + } + + test("AuthenticateWhenCustomQueryFilterFailsAndUserFilterPasses") { + conf.set(AUTHENTICATION_LDAP_BASE_DN, "dc=mycorp,dc=com") + conf.set( + AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY, + "(&(objectClass=person)(|(memberOf=CN=Domain Admins,CN=Users,DC=apache,DC=org)" + + "(memberOf=CN=Administrators,CN=Builtin,DC=apache,DC=org)))") + conf.set(AUTHENTICATION_LDAP_USER_FILTER.key, "user3") + intercept[AuthenticationException] { + lenient.when(search.findUserDn("user3")).thenReturn("cn=user3,ou=PowerUsers,dc=mycorp,dc=com") + when(search.executeCustomQuery(anyString)) + .thenReturn(Array( + "cn=user1,ou=PowerUsers,dc=mycorp,dc=com", + "cn=user2,ou=PowerUsers,dc=mycorp,dc=com")) + authenticateUserAndCheckSearchIsClosed("user3") + } + } - assert(e5.getMessage.contains(user)) - assert(e5.getCause.isInstanceOf[CommunicationException]) + test("AuthenticateWhenUserMembershipKeyFilterPasses") { + conf.set(AUTHENTICATION_LDAP_BASE_DN, "dc=mycorp,dc=com") + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "HIVE-USERS") + conf.set(AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, "memberOf") + + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + + val groupDn = "cn=HIVE-USERS,ou=Groups,dc=mycorp,dc=com" + when(search.findGroupDn("HIVE-USERS")).thenReturn(groupDn) + when(search.isUserMemberOfGroup("user1", groupDn)).thenReturn(true) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate("user1", "Blah") + + verify(factory, times(1)).getInstance(isA(classOf[KyuubiConf]), anyString, mockEq("Blah")) + verify(search, times(1)).findGroupDn(anyString) + verify(search, times(1)).isUserMemberOfGroup(anyString, anyString) + verify(search, atLeastOnce).close() + } + + test("AuthenticateWhenUserMembershipKeyFilterFails") { + conf.set(AUTHENTICATION_LDAP_BASE_DN, "dc=mycorp,dc=com") + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "HIVE-USERS") + conf.set(AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, "memberOf") + intercept[AuthenticationException] { + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + + val groupDn = "cn=HIVE-USERS,ou=Groups,dc=mycorp,dc=com" + when(search.findGroupDn("HIVE-USERS")).thenReturn(groupDn) + when(search.isUserMemberOfGroup("user1", groupDn)).thenReturn(false) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate("user1", "Blah") + } + } + + test("AuthenticateWhenUserMembershipKeyFilter2x2PatternsPasses") { + conf.set(AUTHENTICATION_LDAP_GROUP_FILTER.key, "HIVE-USERS1,HIVE-USERS2") + conf.set(AUTHENTICATION_LDAP_GROUP_DN_PATTERN, "cn=%s,ou=Groups,ou=branch1,dc=mycorp,dc=com") + conf.set(AUTHENTICATION_LDAP_USER_DN_PATTERN, "cn=%s,ou=Userss,ou=branch1,dc=mycorp,dc=com") + conf.set(AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, "memberOf") + + when(search.findUserDn("user1")).thenReturn("cn=user1,ou=PowerUsers,dc=mycorp,dc=com") + + when(search.findGroupDn("HIVE-USERS1")) + .thenReturn("cn=HIVE-USERS1,ou=Groups,ou=branch1,dc=mycorp,dc=com") + when(search.findGroupDn("HIVE-USERS2")) + .thenReturn("cn=HIVE-USERS2,ou=Groups,ou=branch1,dc=mycorp,dc=com") + + when(search.isUserMemberOfGroup( + "user1", + "cn=HIVE-USERS1,ou=Groups,ou=branch1,dc=mycorp,dc=com")) + .thenThrow(classOf[NamingException]) + when(search.isUserMemberOfGroup( + "user1", + "cn=HIVE-USERS2,ou=Groups,ou=branch1,dc=mycorp,dc=com")) + .thenReturn(true) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate("user1", "Blah") + + verify(factory, times(1)).getInstance(isA(classOf[KyuubiConf]), anyString, mockEq("Blah")) + verify(search, times(2)).findGroupDn(anyString) + verify(search, times(2)).isUserMemberOfGroup(anyString, anyString) + verify(search, atLeastOnce).close() + } + + // Kyuubi does not implement it + // test("AuthenticateWithBindInCredentialFilePasses") + // test("testAuthenticateWithBindInMissingCredentialFilePasses") + + test("AuthenticateWithBindUserPasses") { + val bindUser = "cn=BindUser,ou=Users,ou=branch1,dc=mycorp,dc=com" + val bindPass = "Blah" + val authFullUser = "cn=user1,ou=Users,ou=branch1,dc=mycorp,dc=com" + val authUser = "user1" + val authPass = "Blah2" + conf.set(AUTHENTICATION_LDAP_BIND_USER, bindUser) + conf.set(AUTHENTICATION_LDAP_BIND_PASSWORD, bindPass) + + when(search.findUserDn(mockEq(authUser))).thenReturn(authFullUser) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate(authUser, authPass) + + verify(factory, times(1)).getInstance( + isA(classOf[KyuubiConf]), + mockEq(bindUser), + mockEq(bindPass)) + verify(factory, times(1)).getInstance( + isA(classOf[KyuubiConf]), + mockEq(authFullUser), + mockEq(authPass)) + verify(search, times(1)).findUserDn(mockEq(authUser)) + } + + test("AuthenticateWithBindUserFailsOnAuthentication") { + val bindUser = "cn=BindUser,ou=Users,ou=branch1,dc=mycorp,dc=com" + val bindPass = "Blah" + val authFullUser = "cn=user1,ou=Users,ou=branch1,dc=mycorp,dc=com" + val authUser = "user1" + val authPass = "Blah2" + conf.set(AUTHENTICATION_LDAP_BIND_USER, bindUser) + conf.set(AUTHENTICATION_LDAP_BIND_PASSWORD, bindPass) + + intercept[AuthenticationException] { + when( + factory.getInstance( + any(classOf[KyuubiConf]), + mockEq(authFullUser), + mockEq(authPass))).thenThrow(classOf[AuthenticationException]) + when(search.findUserDn(mockEq(authUser))).thenReturn(authFullUser) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate(authUser, authPass) + } + } + + test("AuthenticateWithBindUserFailsOnGettingDn") { + val bindUser = "cn=BindUser,ou=Users,ou=branch1,dc=mycorp,dc=com" + val bindPass = "Blah" + val authUser = "user1" + val authPass = "Blah2" + conf.set(AUTHENTICATION_LDAP_BIND_USER, bindUser) + conf.set(AUTHENTICATION_LDAP_BIND_PASSWORD, bindPass) + + intercept[AuthenticationException] { + when(search.findUserDn(mockEq(authUser))).thenThrow(classOf[NamingException]) + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate(authUser, authPass) + } + } + + test("AuthenticateWithBindUserFailsOnBinding") { + val bindUser = "cn=BindUser,ou=Users,ou=branch1,dc=mycorp,dc=com" + val bindPass = "Blah" + val authUser = "user1" + val authPass = "Blah2" + conf.set(AUTHENTICATION_LDAP_BIND_USER, bindUser) + conf.set(AUTHENTICATION_LDAP_BIND_PASSWORD, bindPass) + + intercept[AuthenticationException] { + when(factory.getInstance(any(classOf[KyuubiConf]), mockEq(bindUser), mockEq(bindPass))) + .thenThrow(classOf[AuthenticationException]) + + auth = new LdapAuthenticationProviderImpl(conf, factory) + auth.authenticate(authUser, authPass) + } + } - conf.set(AUTHENTICATION_LDAP_DOMAIN, "kyuubi.com") - val providerImpl4 = new LdapAuthenticationProviderImpl(conf) - intercept[AuthenticationException](providerImpl4.authenticate("kentyao", "kentyao")) + private def authenticateUserAndCheckSearchIsClosed(user: String): Unit = { + auth = new LdapAuthenticationProviderImpl(conf, factory) + try auth.authenticate(user, "password doesn't matter") + finally verify(search, atLeastOnce).close() } } diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/WithLdapServer.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/WithLdapServer.scala index 0bb38684e0b..b31a06f209f 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/WithLdapServer.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/WithLdapServer.scala @@ -17,20 +17,36 @@ package org.apache.kyuubi.service.authentication +import scala.util.Random + import com.unboundid.ldap.listener.{InMemoryDirectoryServer, InMemoryDirectoryServerConfig} +import com.unboundid.ldif.LDIFReader import org.apache.kyuubi.{KyuubiFunSuite, Utils} trait WithLdapServer extends KyuubiFunSuite { protected var ldapServer: InMemoryDirectoryServer = _ - protected val ldapBaseDn = "ou=users" - protected val ldapUser = Utils.currentUser - protected val ldapUserPasswd = "ldapPassword" + protected val ldapBaseDn: Array[String] = Array("ou=users") + protected val ldapUser: String = Utils.currentUser + protected val ldapUserPasswd: String = Random.alphanumeric.take(16).mkString protected def ldapUrl = s"ldap://localhost:${ldapServer.getListenPort}" + /** + * Apply LDIF files + * @param resource the LDIF file under classpath + */ + def applyLDIF(resource: String): Unit = { + ldapServer.applyChangesFromLDIF( + new LDIFReader(Utils.getContextOrKyuubiClassLoader.getResource(resource).openStream())) + } + override def beforeAll(): Unit = { - val config = new InMemoryDirectoryServerConfig(ldapBaseDn) + val config = new InMemoryDirectoryServerConfig(ldapBaseDn: _*) + // disable the schema so that we can apply LDIF which contains Microsoft's Active Directory + // specific definitions. + // https://myshittycode.com/2017/03/28/ + config.setSchema(null) config.addAdditionalBindCredentials(s"uid=$ldapUser,ou=users", ldapUserPasswd) ldapServer = new InMemoryDirectoryServer(config) ldapServer.startListening() diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/ChainFilterSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/ChainFilterSuite.scala new file mode 100644 index 00000000000..d76611b6e11 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/ChainFilterSuite.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.mockito.ArgumentMatchers.{any, anyString} +import org.mockito.Mockito.{doThrow, times, verify, when} +import org.scalatestplus.mockito.MockitoSugar.mock + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class ChainFilterSuite extends KyuubiFunSuite { + private var conf: KyuubiConf = _ + private var filter1: Filter = _ + private var filter2: Filter = _ + private var filter3: Filter = _ + private var factory1: FilterFactory = _ + private var factory2: FilterFactory = _ + private var factory3: FilterFactory = _ + private var factory: FilterFactory = _ + private var search: DirSearch = _ + + override def beforeEach(): Unit = { + conf = new KyuubiConf() + filter1 = mock[Filter] + filter2 = mock[Filter] + filter3 = mock[Filter] + factory1 = mock[FilterFactory] + factory2 = mock[FilterFactory] + factory3 = mock[FilterFactory] + factory = new ChainFilterFactory(factory1, factory2, factory3) + search = mock[DirSearch] + super.beforeEach() + } + + test("FactoryAllNull") { + when(factory1.getInstance(any(classOf[KyuubiConf]))).thenReturn(None) + when(factory2.getInstance(any(classOf[KyuubiConf]))).thenReturn(None) + when(factory3.getInstance(any(classOf[KyuubiConf]))).thenReturn(None) + assert(factory.getInstance(conf).isEmpty) + } + + test("FactoryAllEmpty") { + val emptyFactory = new ChainFilterFactory() + assert(emptyFactory.getInstance(conf).isEmpty) + } + + test("Factory") { + when(factory1.getInstance(any(classOf[KyuubiConf]))).thenReturn(Some(filter1)) + when(factory2.getInstance(any(classOf[KyuubiConf]))).thenReturn(Some(filter2)) + when(factory3.getInstance(any(classOf[KyuubiConf]))).thenReturn(Some(filter3)) + val filter = factory.getInstance(conf).get + filter.apply(search, "User") + verify(filter1, times(1)).apply(search, "User") + verify(filter2, times(1)).apply(search, "User") + verify(filter3, times(1)).apply(search, "User") + } + + test("ApplyNegative") { + intercept[AuthenticationException] { + doThrow(classOf[AuthenticationException]) + .when(filter3) + .apply(any().asInstanceOf[DirSearch], anyString) + when(factory1.getInstance(any(classOf[KyuubiConf]))).thenReturn(Some(filter1)) + when(factory2.getInstance(any(classOf[KyuubiConf]))).thenReturn(None) + when(factory3.getInstance(any(classOf[KyuubiConf]))).thenReturn(Some(filter3)) + val filter = factory.getInstance(conf).get + filter.apply(search, "User") + } + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/CustomQueryFilterSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/CustomQueryFilterSuite.scala new file mode 100644 index 00000000000..5ece4c88cf6 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/CustomQueryFilterSuite.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.mockito.ArgumentMatchers.{eq => mockEq} +import org.mockito.Mockito.when +import org.scalatestplus.mockito.MockitoSugar.mock + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class CustomQueryFilterSuite extends KyuubiFunSuite { + private val USER2_DN: String = "uid=user2,ou=People,dc=example,dc=com" + private val USER1_DN: String = "uid=user1,ou=People,dc=example,dc=com" + private val CUSTOM_QUERY: String = "(&(objectClass=person)(|(uid=user1)(uid=user2)))" + + private val factory: FilterFactory = CustomQueryFilterFactory + private var conf: KyuubiConf = _ + private var search: DirSearch = _ + + override def beforeEach(): Unit = { + conf = new KyuubiConf() + search = mock[DirSearch] + super.beforeEach() + } + + test("Factory") { + conf.unset(KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY) + assert(factory.getInstance(conf).isEmpty) + conf.set(KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY, CUSTOM_QUERY) + assert(factory.getInstance(conf).isDefined) + } + + test("ApplyPositive") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY, CUSTOM_QUERY) + when(search.executeCustomQuery(mockEq(CUSTOM_QUERY))) + .thenReturn(Array(USER1_DN, USER2_DN)) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "user1") + filter.apply(search, "user2") + } + + test("ApplyNegative") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY, CUSTOM_QUERY) + when(search.executeCustomQuery(mockEq(CUSTOM_QUERY))) + .thenReturn(Array(USER1_DN, USER2_DN)) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "user3") + } + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/GroupFilterSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/GroupFilterSuite.scala new file mode 100644 index 00000000000..f1e3c3581e9 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/GroupFilterSuite.scala @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.mockito.ArgumentMatchers.{eq => mockEq} +import org.mockito.Mockito.{lenient, when} +import org.scalatestplus.mockito.MockitoSugar.mock + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class GroupFilterSuite extends KyuubiFunSuite { + private val factory: FilterFactory = GroupFilterFactory + private var conf: KyuubiConf = _ + private var search: DirSearch = _ + + override def beforeEach(): Unit = { + conf = new KyuubiConf + search = mock[DirSearch] + super.beforeEach() + } + + test("GetInstanceWhenGroupFilterIsEmpty") { + conf.unset(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER) + assert(factory.getInstance(conf).isEmpty) + } + + test("GetInstanceOfGroupMembershipKeyFilter") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "G1") + val instance: Filter = factory.getInstance(conf).get + assert(instance.isInstanceOf[GroupMembershipKeyFilter]) + } + + test("GetInstanceOfUserMembershipKeyFilter") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "G1") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, "memberof") + val instance: Filter = factory.getInstance(conf).get + assert(instance.isInstanceOf[UserMembershipKeyFilter]) + } + + test("GroupMembershipKeyFilterApplyPositive") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "HiveUsers") + when(search.findUserDn(mockEq("user1"))) + .thenReturn("cn=user1,ou=People,dc=example,dc=com") + when(search.findUserDn(mockEq("cn=user2,dc=example,dc=com"))) + .thenReturn("cn=user2,ou=People,dc=example,dc=com") + when(search.findUserDn(mockEq("user3@mydomain.com"))) + .thenReturn("cn=user3,ou=People,dc=example,dc=com") + when(search.findGroupsForUser(mockEq("cn=user1,ou=People,dc=example,dc=com"))) + .thenReturn(Array( + "cn=SuperUsers,ou=Groups,dc=example,dc=com", + "cn=Office1,ou=Groups,dc=example,dc=com", + "cn=HiveUsers,ou=Groups,dc=example,dc=com", + "cn=G1,ou=Groups,dc=example,dc=com")) + when(search.findGroupsForUser(mockEq("cn=user2,ou=People,dc=example,dc=com"))) + .thenReturn(Array("cn=HiveUsers,ou=Groups,dc=example,dc=com")) + when(search.findGroupsForUser(mockEq("cn=user3,ou=People,dc=example,dc=com"))) + .thenReturn(Array( + "cn=HiveUsers,ou=Groups,dc=example,dc=com", + "cn=G1,ou=Groups,dc=example,dc=com", + "cn=G2,ou=Groups,dc=example,dc=com")) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "user1") + filter.apply(search, "cn=user2,dc=example,dc=com") + filter.apply(search, "user3@mydomain.com") + } + + test("GroupMembershipKeyCaseInsensitiveFilterApplyPositive") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "hiveusers,g1") + when(search.findUserDn(mockEq("user1"))) + .thenReturn("cn=user1,ou=People,dc=example,dc=com") + when(search.findUserDn(mockEq("cn=user2,dc=example,dc=com"))) + .thenReturn("cn=user2,ou=People,dc=example,dc=com") + when(search.findUserDn(mockEq("user3@mydomain.com"))) + .thenReturn("cn=user3,ou=People,dc=example,dc=com") + when(search.findGroupsForUser(mockEq("cn=user1,ou=People,dc=example,dc=com"))) + .thenReturn(Array( + "cn=SuperUsers,ou=Groups,dc=example,dc=com", + "cn=Office1,ou=Groups,dc=example,dc=com", + "cn=HiveUsers,ou=Groups,dc=example,dc=com", + "cn=G1,ou=Groups,dc=example,dc=com")) + when(search.findGroupsForUser(mockEq("cn=user2,ou=People,dc=example,dc=com"))) + .thenReturn(Array("cn=HiveUsers,ou=Groups,dc=example,dc=com")) + when(search.findGroupsForUser(mockEq("cn=user3,ou=People,dc=example,dc=com"))) + .thenReturn(Array( + "cn=G1,ou=Groups,dc=example,dc=com", + "cn=G2,ou=Groups,dc=example,dc=com")) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "user1") + filter.apply(search, "cn=user2,dc=example,dc=com") + filter.apply(search, "user3@mydomain.com") + } + + test("GroupMembershipKeyCaseInsensitiveFilterApplyNegative") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "hiveusers,containsg1") + lenient.when(search.findGroupsForUser(mockEq("user1"))) + .thenReturn(Array("SuperUsers", "Office1", "G1", "G2")) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "user1") + } + } + + test("GroupMembershipKeyFilterApplyNegative") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "HiveUsers") + lenient.when(search.findGroupsForUser(mockEq("user1"))) + .thenReturn(Array("SuperUsers", "Office1", "G1", "G2")) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "user1") + } + } + + test("UserMembershipKeyFilterApplyPositiveWithUserId") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY.key, "memberOf") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "Group1,Group2") + when(search.findGroupDn("Group1")).thenReturn("cn=Group1,dc=a,dc=b") + when(search.findGroupDn("Group2")).thenReturn("cn=Group2,dc=a,dc=b") + when(search.isUserMemberOfGroup("User1", "cn=Group2,dc=a,dc=b")).thenReturn(true) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "User1") + } + + test("UserMembershipKeyFilterApplyPositiveWithUserDn") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY.key, "memberOf") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "Group1,Group2") + when(search.findGroupDn("Group1")).thenReturn("cn=Group1,dc=a,dc=b") + when(search.findGroupDn("Group2")).thenReturn("cn=Group2,dc=a,dc=b") + when(search.isUserMemberOfGroup("cn=User1,dc=a,dc=b", "cn=Group2,dc=a,dc=b")).thenReturn(true) + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "cn=User1,dc=a,dc=b") + } + + test("UserMembershipKeyFilterApplyNegative") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY.key, "memberOf") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "Group1,Group2") + when(search.findGroupDn("Group1")).thenReturn("cn=Group1,dc=a,dc=b") + when(search.findGroupDn("Group2")).thenReturn("cn=Group2,dc=a,dc=b") + val filter: Filter = factory.getInstance(conf).get + filter.apply(search, "User1") + } + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapAuthenticationTestCase.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapAuthenticationTestCase.scala new file mode 100644 index 00000000000..e8b92ebc0ec --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapAuthenticationTestCase.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import scala.collection.mutable + +import org.scalatest.Assertions.{fail, intercept} + +import org.apache.kyuubi.config.{ConfigEntry, KyuubiConf} +import org.apache.kyuubi.service.authentication.LdapAuthenticationProviderImpl + +object LdapAuthenticationTestCase { + def builder: LdapAuthenticationTestCase.Builder = new LdapAuthenticationTestCase.Builder + + class Builder { + private val overrides: mutable.Map[ConfigEntry[_], String] = new mutable.HashMap + + var conf: KyuubiConf = _ + + def baseDN(baseDN: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, baseDN) + + def guidKey(guidKey: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_GUID_KEY, guidKey) + + def userDNPatterns(userDNPatterns: String*): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, userDNPatterns.mkString(":")) + + def userFilters(userFilters: String*): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER, userFilters.mkString(",")) + + def groupDNPatterns(groupDNPatterns: String*): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN, groupDNPatterns.mkString(":")) + + def groupFilters(groupFilters: String*): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER, groupFilters.mkString(",")) + + def groupClassKey(groupClassKey: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_GROUP_CLASS_KEY, groupClassKey) + + def ldapUrl(ldapUrl: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_URL, ldapUrl) + + def customQuery(customQuery: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_CUSTOM_LDAP_QUERY, customQuery) + + def groupMembershipKey(groupMembershipKey: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_GROUP_MEMBERSHIP_KEY, groupMembershipKey) + + def userMembershipKey(userMembershipKey: String): LdapAuthenticationTestCase.Builder = + setVarOnce(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, userMembershipKey) + + private def setVarOnce( + confVar: ConfigEntry[_], + value: String): LdapAuthenticationTestCase.Builder = { + require(!overrides.contains(confVar), s"Property $confVar has been set already") + overrides.put(confVar, value) + this + } + + def build: LdapAuthenticationTestCase = { + require(conf == null, "Test Case Builder should not be reused. Please create a new instance.") + conf = new KyuubiConf() + overrides.foreach { case (k, v) => conf.set(k.key, v) } + new LdapAuthenticationTestCase(this) + } + } +} + +final class LdapAuthenticationTestCase(builder: LdapAuthenticationTestCase.Builder) { + + private val ldapProvider = new LdapAuthenticationProviderImpl(builder.conf) + + def assertAuthenticatePasses(credentials: Credentials): Unit = + try { + ldapProvider.authenticate(credentials.user, credentials.password) + } catch { + case e: AuthenticationException => + throw new AssertionError( + s"Authentication failed for user '${credentials.user}' " + + s"with password '${credentials.password}'", + e) + } + + def assertAuthenticateFails(credentials: Credentials): Unit = { + assertAuthenticateFails(credentials.user, credentials.password) + } + + def assertAuthenticateFailsUsingWrongPassword(credentials: Credentials): Unit = { + assertAuthenticateFails(credentials.user, "not" + credentials.password) + } + + def assertAuthenticateFails(user: String, password: String): Unit = { + val e = intercept[AuthenticationException] { + ldapProvider.authenticate(user, password) + fail(s"Expected authentication to fail for $user") + } + assert(e != null) + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearchSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearchSuite.scala new file mode 100644 index 00000000000..3bf27127ba3 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapSearchSuite.scala @@ -0,0 +1,298 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.{NamingEnumeration, NamingException} +import javax.naming.directory.{DirContext, SearchControls, SearchResult} + +import org.mockito.ArgumentMatchers.{any, anyString, contains, eq => mockEq} +import org.mockito.Mockito.{atLeastOnce, verify, when} +import org.scalatestplus.mockito.MockitoSugar.mock + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.service.authentication.ldap.LdapTestUtils._ + +class LdapSearchSuite extends KyuubiFunSuite { + private var conf: KyuubiConf = _ + private var ctx: DirContext = _ + private var search: LdapSearch = _ + + override protected def beforeEach(): Unit = { + conf = new KyuubiConf() + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, "memberOf") + ctx = mock[DirContext] + super.beforeEach() + } + + test("close") { + search = new LdapSearch(conf, ctx) + search.close() + verify(ctx, atLeastOnce).close() + } + + test("FindUserDnWhenUserDnPositive") { + val searchResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=User1,OU=org1,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(searchResult) + .thenThrow(classOf[NamingException]) + search = new LdapSearch(conf, ctx) + val expected: String = "CN=User1,OU=org1,DC=foo,DC=bar" + val actual: String = search.findUserDn("CN=User1,OU=org1") + assert(expected === actual) + } + + test("FindUserDnWhenUserDnNegativeDuplicates") { + val searchResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=User1,OU=org1,DC=foo,DC=bar", "CN=User1,OU=org2,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(searchResult) + search = new LdapSearch(conf, ctx) + assert(search.findUserDn("CN=User1,DC=foo,DC=bar") === null) + } + + test("FindUserDnWhenUserDnNegativeNone") { + val searchResult: NamingEnumeration[SearchResult] = mockEmptyNamingEnumeration + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(searchResult) + search = new LdapSearch(conf, ctx) + assert(search.findUserDn("CN=User1,DC=foo,DC=bar") === null) + } + + test("FindUserDnWhenUserPatternFoundBySecondPattern") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar:CN=%s,OU=org2,DC=foo,DC=bar") + val emptyResult: NamingEnumeration[SearchResult] = mockEmptyNamingEnumeration + val validResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=User1,OU=org2,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(emptyResult) + .thenReturn(validResult) + search = new LdapSearch(conf, ctx) + val expected: String = "CN=User1,OU=org2,DC=foo,DC=bar" + val actual: String = search.findUserDn("User1") + assert(expected === actual) + verify(ctx).search( + mockEq("OU=org1,DC=foo,DC=bar"), + contains("CN=User1"), + any(classOf[SearchControls])) + verify(ctx).search( + mockEq("OU=org2,DC=foo,DC=bar"), + contains("CN=User1"), + any(classOf[SearchControls])) + } + + test("FindUserDnWhenUserPatternFoundByFirstPattern") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar:CN=%s,OU=org2,DC=foo,DC=bar") + val emptyResult: NamingEnumeration[SearchResult] = mockEmptyNamingEnumeration + val validResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=User1,OU=org2,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(validResult) + .thenReturn(emptyResult) + search = new LdapSearch(conf, ctx) + val expected: String = "CN=User1,OU=org2,DC=foo,DC=bar" + val actual: String = search.findUserDn("User1") + assert(expected === actual) + verify(ctx).search( + mockEq("OU=org1,DC=foo,DC=bar"), + contains("CN=User1"), + any(classOf[SearchControls])) + } + + test("FindUserDnWhenUserPatternFoundByUniqueIdentifier") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val validResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=User1,OU=org1,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(null) + .thenReturn(validResult) + search = new LdapSearch(conf, ctx) + val expected: String = "CN=User1,OU=org1,DC=foo,DC=bar" + val actual: String = search.findUserDn("User1") + assert(expected === actual) + verify(ctx).search( + mockEq("OU=org1,DC=foo,DC=bar"), + contains("CN=User1"), + any(classOf[SearchControls])) + verify(ctx).search( + mockEq("OU=org1,DC=foo,DC=bar"), + contains("uid=User1"), + any(classOf[SearchControls])) + } + + test("FindUserDnWhenUserPatternFoundByUniqueIdentifierNegativeNone") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(null) + .thenReturn(null) + search = new LdapSearch(conf, ctx) + assert(search.findUserDn("User1") === null) + } + + test("FindUserDnWhenUserPatternFoundByUniqueIdentifierNegativeMany") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val manyResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=User1,OU=org1,DC=foo,DC=bar", "CN=User12,OU=org1,DC=foo,DC=bar") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(null) + .thenReturn(manyResult) + search = new LdapSearch(conf, ctx) + assert(search.findUserDn("User1") === null) + } + + test("FindGroupsForUser") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val groupsResult: NamingEnumeration[SearchResult] = + mockNamingEnumeration("CN=Group1,OU=org1,DC=foo,DC=bar") + when( + ctx.search( + mockEq("OU=org1,DC=foo,DC=bar"), + contains("User1"), + any(classOf[SearchControls]))).thenReturn(groupsResult) + search = new LdapSearch(conf, ctx) + val expected = Array("CN=Group1,OU=org1,DC=foo,DC=bar") + val actual = search.findGroupsForUser("CN=User1,OU=org1,DC=foo,DC=bar") + assert(expected === actual) + } + + test("ExecuteCustomQuery") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, "dc=example,dc=com") + val customQueryResult: NamingEnumeration[SearchResult] = mockNamingEnumeration(Array( + mockSearchResult( + "uid=group1,ou=Groups,dc=example,dc=com", + mockAttributes("member", "uid=user1,ou=People,dc=example,dc=com")), + mockSearchResult( + "uid=group2,ou=Groups,dc=example,dc=com", + mockAttributes("member", "uid=user2,ou=People,dc=example,dc=com")))) + when( + ctx.search( + mockEq("dc=example,dc=com"), + anyString, + any(classOf[SearchControls]))) + .thenReturn(customQueryResult) + search = new LdapSearch(conf, ctx) + val expected = Array( + "uid=group1,ou=Groups,dc=example,dc=com", + "uid=user1,ou=People,dc=example,dc=com", + "uid=group2,ou=Groups,dc=example,dc=com", + "uid=user2,ou=People,dc=example,dc=com") + val actual = search.executeCustomQuery("(&(objectClass=groupOfNames)(|(cn=group1)(cn=group2)))") + assert(expected.sorted === actual.sorted) + } + + test("FindGroupDnPositive") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val groupDn: String = "CN=Group1" + val result: NamingEnumeration[SearchResult] = mockNamingEnumeration(groupDn) + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(result) + search = new LdapSearch(conf, ctx) + val expected: String = groupDn + val actual: String = search.findGroupDn("grp1") + assert(expected === actual) + } + + test("FindGroupDNNoResults") { + intercept[NamingException] { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val result: NamingEnumeration[SearchResult] = mockEmptyNamingEnumeration + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(result) + search = new LdapSearch(conf, ctx) + search.findGroupDn("anyGroup") + } + } + + test("FindGroupDNTooManyResults") { + intercept[NamingException] { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val result: NamingEnumeration[SearchResult] = + LdapTestUtils.mockNamingEnumeration("Result1", "Result2", "Result3") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(result) + search = new LdapSearch(conf, ctx) + search.findGroupDn("anyGroup") + } + + } + + test("FindGroupDNWhenExceptionInSearch") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_GROUP_DN_PATTERN, + Array("CN=%s,OU=org1,DC=foo,DC=bar", "CN=%s,OU=org2,DC=foo,DC=bar").mkString(":")) + val result: NamingEnumeration[SearchResult] = LdapTestUtils.mockNamingEnumeration("CN=Group1") + when(ctx.search(anyString, anyString, any(classOf[SearchControls]))) + .thenReturn(result) + .thenThrow(classOf[NamingException]) + search = new LdapSearch(conf, ctx) + val expected: String = "CN=Group1" + val actual: String = search.findGroupDn("grp1") + assert(expected === actual) + } + + test("IsUserMemberOfGroupWhenUserId") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val validResult: NamingEnumeration[SearchResult] = + LdapTestUtils.mockNamingEnumeration("CN=User1") + val emptyResult: NamingEnumeration[SearchResult] = LdapTestUtils.mockEmptyNamingEnumeration + when(ctx.search(anyString, contains("(uid=usr1)"), any(classOf[SearchControls]))) + .thenReturn(validResult) + when(ctx.search(anyString, contains("(uid=usr2)"), any(classOf[SearchControls]))) + .thenReturn(emptyResult) + search = new LdapSearch(conf, ctx) + assert(search.isUserMemberOfGroup("usr1", "grp1")) + assert(!search.isUserMemberOfGroup("usr2", "grp2")) + } + + test("IsUserMemberOfGroupWhenUserDn") { + conf.set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "CN=%s,OU=org1,DC=foo,DC=bar") + val validResult: NamingEnumeration[SearchResult] = + LdapTestUtils.mockNamingEnumeration("CN=User1") + val emptyResult: NamingEnumeration[SearchResult] = LdapTestUtils.mockEmptyNamingEnumeration + when(ctx.search(anyString, contains("(uid=User1)"), any(classOf[SearchControls]))) + .thenReturn(validResult) + when(ctx.search(anyString, contains("(uid=User2)"), any(classOf[SearchControls]))) + .thenReturn(emptyResult) + search = new LdapSearch(conf, ctx) + assert(search.isUserMemberOfGroup("CN=User1,OU=org1,DC=foo,DC=bar", "grp1")) + assert(!search.isUserMemberOfGroup("CN=User2,OU=org1,DC=foo,DC=bar", "grp2")) + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapTestUtils.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapTestUtils.scala new file mode 100644 index 00000000000..49340f2c493 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapTestUtils.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.{NamingEnumeration, NamingException} +import javax.naming.directory._ + +import org.mockito.Mockito.when +import org.mockito.stubbing.OngoingStubbing +import org.scalatestplus.mockito.MockitoSugar + +case class NameValues(name: String, values: String*) +case class Credentials(user: String, password: String) + +case class User(dn: String, id: String, password: String) { + + def credentialsWithDn: Credentials = Credentials(dn, password) + + def credentialsWithId: Credentials = Credentials(id, password) +} + +object User { + def useIdForPassword(dn: String, id: String): User = User(dn, id, id) +} + +object LdapTestUtils extends MockitoSugar { + @throws[NamingException] + def mockEmptyNamingEnumeration: NamingEnumeration[SearchResult] = + mockNamingEnumeration(new Array[SearchResult](0)) + + @throws[NamingException] + def mockNamingEnumeration(dns: String*): NamingEnumeration[SearchResult] = + mockNamingEnumeration(mockSearchResults(dns.toArray)) + + @throws[NamingException] + def mockNamingEnumeration(searchResults: Array[SearchResult]): NamingEnumeration[SearchResult] = { + val ne = mock[NamingEnumeration[SearchResult]] + mockHasMoreMethod(ne, searchResults.length) + if (searchResults.nonEmpty) { + val mockedResults = Array(searchResults: _*) + mockNextMethod(ne, mockedResults) + } + ne + } + + @throws[NamingException] + def mockHasMoreMethod(ne: NamingEnumeration[SearchResult], length: Int): Unit = { + var hasMoreStub: OngoingStubbing[Boolean] = when(ne.hasMore) + (0 until length).foreach(_ => hasMoreStub = hasMoreStub.thenReturn(true)) + hasMoreStub.thenReturn(false) + } + + @throws[NamingException] + def mockNextMethod( + ne: NamingEnumeration[SearchResult], + searchResults: Array[SearchResult]): Unit = { + var nextStub: OngoingStubbing[SearchResult] = when(ne.next) + searchResults.foreach { searchResult => + nextStub = nextStub.thenReturn(searchResult) + } + } + + def mockSearchResults(dns: Array[String]): Array[SearchResult] = { + dns.map(mockSearchResult(_, null)) + } + + def mockSearchResult(dn: String, attributes: Attributes): SearchResult = { + val searchResult = mock[SearchResult] + when(searchResult.getNameInNamespace).thenReturn(dn) + when(searchResult.getAttributes).thenReturn(attributes) + searchResult + } + + @throws[NamingException] + def mockEmptyAttributes(): Attributes = mockAttributes() + + @throws[NamingException] + def mockAttributes(name: String, value: String): Attributes = + mockAttributes(NameValues(name, value)) + + @throws[NamingException] + def mockAttributes(name1: String, value1: String, name2: String, value2: String): Attributes = + if (name1 == name2) { + mockAttributes(NameValues(name1, value1, value2)) + } else { + mockAttributes( + NameValues(name1, value1), + NameValues(name2, value2)) + } + + @throws[NamingException] + private def mockAttributes(namedValues: NameValues*): Attributes = { + val attributes = new BasicAttributes + namedValues.foreach { namedValue => + val attr = new BasicAttribute(namedValue.name) + namedValue.values.foreach(attr.add) + attributes.put(attr) + } + attributes + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapUtilsSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapUtilsSuite.scala new file mode 100644 index 00000000000..1ef371051e4 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/LdapUtilsSuite.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class LdapUtilsSuite extends KyuubiFunSuite { + test("CreateCandidatePrincipalsForUserDn") { + val conf = new KyuubiConf() + val userDn = "cn=user1,ou=CORP,dc=mycompany,dc=com" + val expected = Array(userDn) + val actual = LdapUtils.createCandidatePrincipals(conf, userDn) + assert(actual === expected) + } + + test("CreateCandidatePrincipalsForUserWithDomain") { + val conf = new KyuubiConf() + val userWithDomain: String = "user1@mycompany.com" + val expected = Array(userWithDomain) + val actual = LdapUtils.createCandidatePrincipals(conf, userWithDomain) + assert(actual === expected) + } + + test("CreateCandidatePrincipalsLdapDomain") { + val conf = new KyuubiConf() + .set(KyuubiConf.AUTHENTICATION_LDAP_DOMAIN, "mycompany.com") + val expected = Array("user1@mycompany.com") + val actual = LdapUtils.createCandidatePrincipals(conf, "user1") + assert(actual === expected) + } + + test("CreateCandidatePrincipalsUserPatternsDefaultBaseDn") { + val conf = new KyuubiConf() + .set(KyuubiConf.AUTHENTICATION_LDAP_GUID_KEY, "sAMAccountName") + .set(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, "dc=mycompany,dc=com") + val expected = Array("sAMAccountName=user1,dc=mycompany,dc=com") + val actual = LdapUtils.createCandidatePrincipals(conf, "user1") + assert(actual === expected) + } + + test("CreateCandidatePrincipals") { + val conf = new KyuubiConf() + .set(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, "dc=mycompany,dc=com") + .set( + KyuubiConf.AUTHENTICATION_LDAP_USER_DN_PATTERN, + "cn=%s,ou=CORP1,dc=mycompany,dc=com:cn=%s,ou=CORP2,dc=mycompany,dc=com") + val expected = Array( + "cn=user1,ou=CORP1,dc=mycompany,dc=com", + "cn=user1,ou=CORP2,dc=mycompany,dc=com") + val actual = LdapUtils.createCandidatePrincipals(conf, "user1") + assert(actual.sorted === expected.sorted) + } + + test("ExtractFirstRdn") { + val dn = "cn=user1,ou=CORP1,dc=mycompany,dc=com" + val expected = "cn=user1" + val actual = LdapUtils.extractFirstRdn(dn) + assert(actual === expected) + } + + test("ExtractBaseDn") { + val dn: String = "cn=user1,ou=CORP1,dc=mycompany,dc=com" + val expected = "ou=CORP1,dc=mycompany,dc=com" + val actual = LdapUtils.extractBaseDn(dn) + assert(actual === expected) + } + + test("ExtractBaseDnNegative") { + val dn: String = "cn=user1" + assert(LdapUtils.extractBaseDn(dn) === null) + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/QueryFactorySuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/QueryFactorySuite.scala new file mode 100644 index 00000000000..56800968000 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/QueryFactorySuite.scala @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class QueryFactorySuite extends KyuubiFunSuite { + private var conf: KyuubiConf = _ + private var queries: QueryFactory = _ + + override def beforeEach(): Unit = { + conf = new KyuubiConf() + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GUID_KEY, "guid") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_CLASS_KEY, "superGroups") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_MEMBERSHIP_KEY, "member") + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_MEMBERSHIP_KEY, "partOf") + queries = new QueryFactory(conf) + super.beforeEach() + } + + test("FindGroupDnById") { + val q = queries.findGroupDnById("unique_group_id") + val expected = "(&(objectClass=superGroups)(guid=unique_group_id))" + val actual = q.filter + assert(expected === actual) + } + + test("FindUserDnByRdn") { + val q = queries.findUserDnByRdn("cn=User1") + val expected = + "(&(|(objectClass=person)(objectClass=user)(objectClass=inetOrgPerson))(cn=User1))" + val actual = q.filter + assert(expected === actual) + } + + test("FindDnByPattern") { + val q = queries.findDnByPattern("cn=User1") + val expected = "(cn=User1)" + val actual = q.filter + assert(expected === actual) + } + + test("FindUserDnByName") { + val q = queries.findUserDnByName("unique_user_id") + val expected = + "(&(|(objectClass=person)(objectClass=user)(objectClass=inetOrgPerson))" + + "(|(uid=unique_user_id)(sAMAccountName=unique_user_id)))" + val actual = q.filter + assert(expected === actual) + } + + test("FindGroupsForUser") { + val q = queries.findGroupsForUser("user_name", "user_Dn") + val expected = "(&(objectClass=superGroups)(|(member=user_Dn)(member=user_name)))" + val actual = q.filter + assert(expected === actual) + } + + test("IsUserMemberOfGroup") { + val q = queries.isUserMemberOfGroup("unique_user", "cn=MyGroup,ou=Groups,dc=mycompany,dc=com") + val expected = + "(&(|(objectClass=person)(objectClass=user)(objectClass=inetOrgPerson))" + + "(partOf=cn=MyGroup,ou=Groups,dc=mycompany,dc=com)(guid=unique_user))" + val actual = q.filter + assert(expected === actual) + } + + test("IsUserMemberOfGroupWhenMisconfigured") { + intercept[IllegalArgumentException] { + val misconfiguredQueryFactory = new QueryFactory(new KyuubiConf()) + misconfiguredQueryFactory.isUserMemberOfGroup("user", "cn=MyGroup") + } + } + + test("FindGroupDNByID") { + val q = queries.findGroupDnById("unique_group_id") + val expected = "(&(objectClass=superGroups)(guid=unique_group_id))" + val actual = q.filter + assert(expected === actual) + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/QuerySuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/QuerySuite.scala new file mode 100644 index 00000000000..ffe330cce8b --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/QuerySuite.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import org.apache.kyuubi.KyuubiFunSuite + +class QuerySuite extends KyuubiFunSuite { + + test("QueryBuilderFilter") { + val q = Query.builder + .filter("test = query") + .map("uid_attr", "uid") + .map("value", "Hello!") + .build + assert("test uid=Hello! query" === q.filter) + assert(0 === q.controls.getCountLimit) + } + + test("QueryBuilderLimit") { + val q = Query.builder + .filter(",") + .map("key1", "value1") + .map("key2", "value2") + .limit(8) + .build + assert("value1,value2" === q.filter) + assert(8 === q.controls.getCountLimit) + } + + test("QueryBuilderReturningAttributes") { + val q = Query.builder + .filter("(query)") + .returnAttribute("attr1") + .returnAttribute("attr2") + .build + assert("(query)" === q.filter) + assert(Array("attr1", "attr2") === q.controls.getReturningAttributes) + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/SearchResultHandlerSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/SearchResultHandlerSuite.scala new file mode 100644 index 00000000000..4e92f7f5feb --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/SearchResultHandlerSuite.scala @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import java.util +import javax.naming.{NamingEnumeration, NamingException} +import javax.naming.directory.SearchResult + +import scala.collection.mutable.ArrayBuffer + +import org.mockito.Mockito.{atLeastOnce, doThrow, verify} + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.service.authentication.ldap.LdapTestUtils._ + +class SearchResultHandlerSuite extends KyuubiFunSuite { + private var handler: SearchResultHandler = _ + + test("handle") { + val resultCollection = new MockResultCollectionBuilder() + .addSearchResultWithDns("1") + .addSearchResultWithDns("2", "3") + .build + handler = new SearchResultHandler(resultCollection) + val expected: util.List[String] = util.Arrays.asList("1", "2") + val actual: util.List[String] = new util.ArrayList[String] + handler.handle { record => + actual.add(record.getNameInNamespace) + actual.size < 2 + } + assert(expected === actual) + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("GetAllLdapNamesNoRecords") { + val resultCollection = new MockResultCollectionBuilder() + .addEmptySearchResult() + .build + handler = new SearchResultHandler(resultCollection) + val actual = handler.getAllLdapNames + assert(actual.isEmpty, "ResultSet size") + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("GetAllLdapNamesWithExceptionInNamingEnumerationClose") { + val resultCollection = new MockResultCollectionBuilder() + .addSearchResultWithDns("1") + .addSearchResultWithDns("2") + .build + doThrow(classOf[NamingException]).when(resultCollection.iterator.next).close() + handler = new SearchResultHandler(resultCollection) + val actual = handler.getAllLdapNames + assert(actual.length === 2, "ResultSet size") + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("GetAllLdapNames") { + val objectDn1: String = "cn=a1,dc=b,dc=c" + val objectDn2: String = "cn=a2,dc=b,dc=c" + val objectDn3: String = "cn=a3,dc=b,dc=c" + val resultCollection = new MockResultCollectionBuilder() + .addSearchResultWithDns(objectDn1) + .addSearchResultWithDns(objectDn2, objectDn3) + .build + handler = new SearchResultHandler(resultCollection) + val expected = Array(objectDn1, objectDn2, objectDn3) + val actual = handler.getAllLdapNames + assert(expected.sorted === actual.sorted) + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("GetAllLdapNamesAndAttributes") { + val searchResult1 = mockSearchResult( + "cn=a1,dc=b,dc=c", + mockAttributes("attr1", "attr1value1")) + val searchResult2 = mockSearchResult( + "cn=a2,dc=b,dc=c", + mockAttributes("attr1", "attr1value2", "attr2", "attr2value1")) + val searchResult3 = mockSearchResult( + "cn=a3,dc=b,dc=c", + mockAttributes("attr1", "attr1value3", "attr1", "attr1value4")) + val searchResult4 = mockSearchResult( + "cn=a4,dc=b,dc=c", + mockEmptyAttributes()) + val resultCollection = new MockResultCollectionBuilder() + .addSearchResults(searchResult1) + .addSearchResults(searchResult2, searchResult3) + .addSearchResults(searchResult4) + .build + handler = new SearchResultHandler(resultCollection) + val expected = Array( + "cn=a1,dc=b,dc=c", + "attr1value1", + "cn=a2,dc=b,dc=c", + "attr1value2", + "attr2value1", + "cn=a3,dc=b,dc=c", + "attr1value3", + "attr1value4", + "cn=a4,dc=b,dc=c") + val actual = handler.getAllLdapNamesAndAttributes + assert(expected.sorted === actual.sorted) + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("HasSingleResultNoRecords") { + val resultCollection = new MockResultCollectionBuilder() + .addEmptySearchResult() + .build + handler = new SearchResultHandler(resultCollection) + assert(!handler.hasSingleResult) + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("HasSingleResult") { + val resultCollection = new MockResultCollectionBuilder() + .addSearchResultWithDns("1") + .build + handler = new SearchResultHandler(resultCollection) + assert(handler.hasSingleResult) + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("HasSingleResultManyRecords") { + val resultCollection = new MockResultCollectionBuilder() + .addSearchResultWithDns("1") + .addSearchResultWithDns("2") + .build + handler = new SearchResultHandler(resultCollection) + assert(!handler.hasSingleResult) + assertAllNamingEnumerationsClosed(resultCollection) + } + + test("GetSingleLdapNameNoRecords") { + intercept[NamingException] { + val resultCollection = new MockResultCollectionBuilder() + .addEmptySearchResult() + .build + handler = new SearchResultHandler(resultCollection) + try handler.getSingleLdapName + finally { + assertAllNamingEnumerationsClosed(resultCollection) + } + } + } + + test("GetSingleLdapName") { + val objectDn: String = "cn=a,dc=b,dc=c" + val resultCollection = new MockResultCollectionBuilder() + .addEmptySearchResult() + .addSearchResultWithDns(objectDn) + .build + handler = new SearchResultHandler(resultCollection) + val expected: String = objectDn + val actual: String = handler.getSingleLdapName + assert(expected === actual) + assertAllNamingEnumerationsClosed(resultCollection) + } + + private def assertAllNamingEnumerationsClosed( + resultCollection: Array[NamingEnumeration[SearchResult]]): Unit = { + for (namingEnumeration <- resultCollection) { + verify(namingEnumeration, atLeastOnce).close() + } + } +} + +class MockResultCollectionBuilder { + + val results = new ArrayBuffer[NamingEnumeration[SearchResult]] + + def addSearchResultWithDns(dns: String*): MockResultCollectionBuilder = { + results += mockNamingEnumeration(dns: _*) + this + } + + def addSearchResults(dns: SearchResult*): MockResultCollectionBuilder = { + results += mockNamingEnumeration(dns.toArray) + this + } + + def addEmptySearchResult(): MockResultCollectionBuilder = { + addSearchResults() + this + } + + def build: Array[NamingEnumeration[SearchResult]] = results.toArray +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/UserFilterSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/UserFilterSuite.scala new file mode 100644 index 00000000000..4fc6cba49b8 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/UserFilterSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.security.sasl.AuthenticationException + +import org.scalatestplus.mockito.MockitoSugar.mock + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class UserFilterSuite extends KyuubiFunSuite { + private val factory: FilterFactory = UserFilterFactory + private var conf: KyuubiConf = _ + private var search: DirSearch = _ + + override def beforeEach(): Unit = { + conf = new KyuubiConf() + search = mock[DirSearch] + super.beforeEach() + } + + test("Factory") { + conf.unset(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER) + assert(factory.getInstance(conf).isEmpty) + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1") + assert(factory.getInstance(conf).isDefined) + } + + test("ApplyPositive") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1,User2,uSeR3") + val filter = factory.getInstance(conf).get + filter.apply(search, "User1") + filter.apply(search, "uid=user2,ou=People,dc=example,dc=com") + filter.apply(search, "User3@mydomain.com") + } + + test("ApplyNegative") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1,User2") + val filter = factory.getInstance(conf).get + filter.apply(search, "User3") + } + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/UserSearchFilterSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/UserSearchFilterSuite.scala new file mode 100644 index 00000000000..1a711a6d9c9 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/ldap/UserSearchFilterSuite.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service.authentication.ldap + +import javax.naming.NamingException +import javax.security.sasl.AuthenticationException + +import org.mockito.ArgumentMatchers.anyString +import org.mockito.Mockito.when +import org.scalatestplus.mockito.MockitoSugar.mock + +import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.config.KyuubiConf + +class UserSearchFilterSuite extends KyuubiFunSuite { + private val factory: FilterFactory = UserSearchFilterFactory + private var conf: KyuubiConf = _ + private var search: DirSearch = _ + + override def beforeEach(): Unit = { + conf = new KyuubiConf() + search = mock[DirSearch] + super.beforeEach() + } + + test("FactoryWhenNoGroupOrUserFilters") { + assert(factory.getInstance(conf).isEmpty) + } + + test("FactoryWhenGroupFilter") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_GROUP_FILTER.key, "Grp1,Grp2") + assert(factory.getInstance(conf).isDefined) + } + + test("FactoryWhenUserFilter") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1,User2") + assert(factory.getInstance(conf).isDefined) + } + + test("ApplyPositive") { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1") + val filter = factory.getInstance(conf).get + when(search.findUserDn(anyString)).thenReturn("cn=User1,ou=People,dc=example,dc=com") + filter.apply(search, "User1") + } + + test("ApplyWhenNamingException") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1") + val filter = factory.getInstance(conf).get + when(search.findUserDn(anyString)).thenThrow(classOf[NamingException]) + filter.apply(search, "User3") + } + } + + test("ApplyWhenNotFound") { + intercept[AuthenticationException] { + conf.set(KyuubiConf.AUTHENTICATION_LDAP_USER_FILTER.key, "User1") + val filter = factory.getInstance(conf).get + when(search.findUserDn(anyString)).thenReturn(null) + filter.apply(search, "User3") + } + } +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/util/SparkVersionUtil.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/util/SparkVersionUtil.scala index cd8409d10db..785015cc377 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/util/SparkVersionUtil.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/util/SparkVersionUtil.scala @@ -17,13 +17,22 @@ package org.apache.kyuubi.util -import org.apache.kyuubi.SPARK_COMPILE_VERSION import org.apache.kyuubi.engine.SemanticVersion +import org.apache.kyuubi.operation.HiveJDBCTestHelper -object SparkVersionUtil { - lazy val sparkSemanticVersion: SemanticVersion = SemanticVersion(SPARK_COMPILE_VERSION) +trait SparkVersionUtil { + this: HiveJDBCTestHelper => - def isSparkVersionAtLeast(ver: String): Boolean = { - sparkSemanticVersion.isVersionAtLeast(ver) + protected lazy val SPARK_ENGINE_RUNTIME_VERSION = sparkEngineMajorMinorVersion + + def sparkEngineMajorMinorVersion: SemanticVersion = { + var sparkRuntimeVer = "" + withJdbcStatement() { stmt => + val result = stmt.executeQuery("SELECT version()") + assert(result.next()) + sparkRuntimeVer = result.getString(1) + assert(!result.next()) + } + SemanticVersion(sparkRuntimeVer) } } diff --git a/kyuubi-ctl/pom.xml b/kyuubi-ctl/pom.xml index aa1e8f2e476..eb4060ffdd5 100644 --- a/kyuubi-ctl/pom.xml +++ b/kyuubi-ctl/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/CtlConf.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/CtlConf.scala index 08fbd7342c9..58b65582a22 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/CtlConf.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/CtlConf.scala @@ -19,16 +19,15 @@ package org.apache.kyuubi.ctl import java.time.Duration -import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf, OptionalConfigEntry} +import org.apache.kyuubi.config.{ConfigEntry, OptionalConfigEntry} +import org.apache.kyuubi.config.KyuubiConf.buildConf object CtlConf { - private def buildConf(key: String): ConfigBuilder = KyuubiConf.buildConf(key) - val CTL_REST_CLIENT_BASE_URL: OptionalConfigEntry[String] = buildConf("kyuubi.ctl.rest.base.url") .doc("The REST API base URL, " + - "which contains the scheme (http:// or https://), host name, port number") + "which contains the scheme (http:// or https://), hostname, port number") .version("1.6.0") .stringConf .createOptional @@ -49,7 +48,7 @@ object CtlConf { val CTL_REST_CLIENT_CONNECT_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.ctl.rest.connect.timeout") - .doc("The timeout[ms] for establishing the connection with the kyuubi server." + + .doc("The timeout[ms] for establishing the connection with the kyuubi server. " + "A timeout value of zero is interpreted as an infinite timeout.") .version("1.6.0") .timeConf @@ -58,7 +57,7 @@ object CtlConf { val CTL_REST_CLIENT_SOCKET_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.ctl.rest.socket.timeout") - .doc("The timeout[ms] for waiting for data packets after connection is established." + + .doc("The timeout[ms] for waiting for data packets after connection is established. " + "A timeout value of zero is interpreted as an infinite timeout.") .version("1.6.0") .timeConf diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/create/CreateServerCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/create/CreateServerCommand.scala index 66f75fc5f67..f4d4ce2ea9a 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/create/CreateServerCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/create/CreateServerCommand.scala @@ -56,7 +56,7 @@ class CreateServerCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeI withDiscoveryClient(kyuubiConf) { discoveryClient => val fromNamespace = DiscoveryPaths.makePath(null, kyuubiConf.get(HA_NAMESPACE)) - val toNamespace = CtlUtils.getZkNamespace(kyuubiConf, normalizedCliConfig) + val toNamespace = CtlUtils.getZkServerNamespace(kyuubiConf, normalizedCliConfig) val currentServerNodes = discoveryClient.getServiceNodesInfo(fromNamespace) val exposedServiceNodes = ListBuffer[ServiceNodeInfo]() diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteCommand.scala index 69479259a6f..ddbe083ce2c 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteCommand.scala @@ -16,15 +16,13 @@ */ package org.apache.kyuubi.ctl.cmd.delete -import scala.collection.mutable.ListBuffer - import org.apache.kyuubi.ctl.cmd.Command import org.apache.kyuubi.ctl.opt.CliConfig -import org.apache.kyuubi.ctl.util.{CtlUtils, Render, Validator} -import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient +import org.apache.kyuubi.ctl.util.{Render, Validator} import org.apache.kyuubi.ha.client.ServiceNodeInfo -class DeleteCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]](cliConfig) { +abstract class DeleteCommand(cliConfig: CliConfig) + extends Command[Seq[ServiceNodeInfo]](cliConfig) { def validate(): Unit = { Validator.validateZkArguments(normalizedCliConfig) @@ -35,28 +33,7 @@ class DeleteCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]]( /** * Delete zookeeper service node with specified host port. */ - def doRun(): Seq[ServiceNodeInfo] = { - withDiscoveryClient(conf) { discoveryClient => - val znodeRoot = CtlUtils.getZkNamespace(conf, normalizedCliConfig) - val hostPortOpt = - Some((normalizedCliConfig.zkOpts.host, normalizedCliConfig.zkOpts.port.toInt)) - val nodesToDelete = CtlUtils.getServiceNodes(discoveryClient, znodeRoot, hostPortOpt) - - val deletedNodes = ListBuffer[ServiceNodeInfo]() - nodesToDelete.foreach { node => - val nodePath = s"$znodeRoot/${node.nodeName}" - info(s"Deleting zookeeper service node:$nodePath") - try { - discoveryClient.delete(nodePath) - deletedNodes += node - } catch { - case e: Exception => - error(s"Failed to delete zookeeper service node:$nodePath", e) - } - } - deletedNodes - } - } + def doRun(): Seq[ServiceNodeInfo] def render(nodes: Seq[ServiceNodeInfo]): Unit = { val title = "Deleted zookeeper service nodes" diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteEngineCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteEngineCommand.scala index 7be60746785..ab6e81e2440 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteEngineCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteEngineCommand.scala @@ -16,7 +16,12 @@ */ package org.apache.kyuubi.ctl.cmd.delete +import scala.collection.mutable.ListBuffer + import org.apache.kyuubi.ctl.opt.CliConfig +import org.apache.kyuubi.ctl.util.CtlUtils +import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient +import org.apache.kyuubi.ha.client.ServiceNodeInfo class DeleteEngineCommand(cliConfig: CliConfig) extends DeleteCommand(cliConfig) { @@ -28,4 +33,29 @@ class DeleteEngineCommand(cliConfig: CliConfig) extends DeleteCommand(cliConfig) fail("Must specify user name for engine, please use -u or --user.") } } + + def doRun(): Seq[ServiceNodeInfo] = { + withDiscoveryClient(conf) { discoveryClient => + val hostPortOpt = + Some((cliConfig.zkOpts.host, cliConfig.zkOpts.port.toInt)) + val candidateNodes = CtlUtils.listZkEngineNodes(conf, normalizedCliConfig, hostPortOpt) + hostPortOpt.map { case (host, port) => + candidateNodes.filter { cn => cn.host == host && cn.port == port } + }.getOrElse(candidateNodes) + val deletedNodes = ListBuffer[ServiceNodeInfo]() + candidateNodes.foreach { node => + val engineNode = discoveryClient.getChildren(node.namespace)(0) + val nodePath = s"${node.namespace}/$engineNode" + info(s"Deleting zookeeper service node:$nodePath") + try { + discoveryClient.delete(nodePath) + deletedNodes += node + } catch { + case e: Exception => + error(s"Failed to delete zookeeper service node:$nodePath", e) + } + } + deletedNodes + } + } } diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteServerCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteServerCommand.scala index 6debba4d56f..197b786459a 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteServerCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteServerCommand.scala @@ -16,6 +16,34 @@ */ package org.apache.kyuubi.ctl.cmd.delete +import scala.collection.mutable.ListBuffer + import org.apache.kyuubi.ctl.opt.CliConfig +import org.apache.kyuubi.ctl.util.CtlUtils +import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient +import org.apache.kyuubi.ha.client.ServiceNodeInfo + +class DeleteServerCommand(cliConfig: CliConfig) extends DeleteCommand(cliConfig) { + override def doRun(): Seq[ServiceNodeInfo] = { + withDiscoveryClient(conf) { discoveryClient => + val znodeRoot = CtlUtils.getZkServerNamespace(conf, normalizedCliConfig) + val hostPortOpt = + Some((normalizedCliConfig.zkOpts.host, normalizedCliConfig.zkOpts.port.toInt)) + val nodesToDelete = CtlUtils.getServiceNodes(discoveryClient, znodeRoot, hostPortOpt) -class DeleteServerCommand(cliConfig: CliConfig) extends DeleteCommand(cliConfig) {} + val deletedNodes = ListBuffer[ServiceNodeInfo]() + nodesToDelete.foreach { node => + val nodePath = s"$znodeRoot/${node.nodeName}" + info(s"Deleting zookeeper service node:$nodePath") + try { + discoveryClient.delete(nodePath) + deletedNodes += node + } catch { + case e: Exception => + error(s"Failed to delete zookeeper service node:$nodePath", e) + } + } + deletedNodes + } + } +} diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetCommand.scala index d78f0b995bb..af8285105c8 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetCommand.scala @@ -18,10 +18,10 @@ package org.apache.kyuubi.ctl.cmd.get import org.apache.kyuubi.ctl.cmd.Command import org.apache.kyuubi.ctl.opt.CliConfig -import org.apache.kyuubi.ctl.util.{CtlUtils, Render, Validator} +import org.apache.kyuubi.ctl.util.{Render, Validator} import org.apache.kyuubi.ha.client.ServiceNodeInfo -class GetCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]](cliConfig) { +abstract class GetCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]](cliConfig) { def validate(): Unit = { Validator.validateZkArguments(normalizedCliConfig) @@ -29,9 +29,7 @@ class GetCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]](cli mergeArgsIntoKyuubiConf() } - def doRun(): Seq[ServiceNodeInfo] = { - CtlUtils.listZkServerNodes(conf, normalizedCliConfig, filterHostPort = true) - } + def doRun(): Seq[ServiceNodeInfo] def render(nodes: Seq[ServiceNodeInfo]): Unit = { val title = "Zookeeper service nodes" diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetEngineCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetEngineCommand.scala index 4d9101625fb..13f4d00c8fa 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetEngineCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetEngineCommand.scala @@ -17,6 +17,8 @@ package org.apache.kyuubi.ctl.cmd.get import org.apache.kyuubi.ctl.opt.CliConfig +import org.apache.kyuubi.ctl.util.CtlUtils +import org.apache.kyuubi.ha.client.ServiceNodeInfo class GetEngineCommand(cliConfig: CliConfig) extends GetCommand(cliConfig) { @@ -28,4 +30,12 @@ class GetEngineCommand(cliConfig: CliConfig) extends GetCommand(cliConfig) { fail("Must specify user name for engine, please use -u or --user.") } } + + override def doRun(): Seq[ServiceNodeInfo] = { + CtlUtils.listZkEngineNodes( + conf, + normalizedCliConfig, + Some((cliConfig.zkOpts.host, cliConfig.zkOpts.port.toInt))) + } + } diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetServerCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetServerCommand.scala index 71b8684532d..faa76b219c4 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetServerCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/get/GetServerCommand.scala @@ -17,5 +17,14 @@ package org.apache.kyuubi.ctl.cmd.get import org.apache.kyuubi.ctl.opt.CliConfig +import org.apache.kyuubi.ctl.util.CtlUtils +import org.apache.kyuubi.ha.client.ServiceNodeInfo -class GetServerCommand(cliConfig: CliConfig) extends GetCommand(cliConfig) {} +class GetServerCommand(cliConfig: CliConfig) extends GetCommand(cliConfig) { + override def doRun(): Seq[ServiceNodeInfo] = { + CtlUtils.listZkServerNodes( + conf, + normalizedCliConfig, + Some((cliConfig.zkOpts.host, cliConfig.zkOpts.port.toInt))) + } +} diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListCommand.scala index 0cfeb8e4ea0..e5a3a688216 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListCommand.scala @@ -18,19 +18,17 @@ package org.apache.kyuubi.ctl.cmd.list import org.apache.kyuubi.ctl.cmd.Command import org.apache.kyuubi.ctl.opt.CliConfig -import org.apache.kyuubi.ctl.util.{CtlUtils, Render, Validator} +import org.apache.kyuubi.ctl.util.{Render, Validator} import org.apache.kyuubi.ha.client.ServiceNodeInfo -class ListCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]](cliConfig) { +abstract class ListCommand(cliConfig: CliConfig) extends Command[Seq[ServiceNodeInfo]](cliConfig) { def validate(): Unit = { Validator.validateZkArguments(normalizedCliConfig) mergeArgsIntoKyuubiConf() } - def doRun(): Seq[ServiceNodeInfo] = { - CtlUtils.listZkServerNodes(conf, normalizedCliConfig, filterHostPort = false) - } + def doRun(): Seq[ServiceNodeInfo] def render(nodes: Seq[ServiceNodeInfo]): Unit = { val title = "Zookeeper service nodes" diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListEngineCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListEngineCommand.scala index 6a78a9e97c3..8a26b4cc973 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListEngineCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListEngineCommand.scala @@ -17,6 +17,8 @@ package org.apache.kyuubi.ctl.cmd.list import org.apache.kyuubi.ctl.opt.CliConfig +import org.apache.kyuubi.ctl.util.CtlUtils +import org.apache.kyuubi.ha.client.ServiceNodeInfo class ListEngineCommand(cliConfig: CliConfig) extends ListCommand(cliConfig) { @@ -28,4 +30,7 @@ class ListEngineCommand(cliConfig: CliConfig) extends ListCommand(cliConfig) { fail("Must specify user name for engine, please use -u or --user.") } } + + override def doRun(): Seq[ServiceNodeInfo] = + CtlUtils.listZkEngineNodes(conf, normalizedCliConfig, None) } diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListServerCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListServerCommand.scala index 8c3219ecea6..56e8f4695cf 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListServerCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/ListServerCommand.scala @@ -17,5 +17,11 @@ package org.apache.kyuubi.ctl.cmd.list import org.apache.kyuubi.ctl.opt.CliConfig +import org.apache.kyuubi.ctl.util.CtlUtils +import org.apache.kyuubi.ha.client.ServiceNodeInfo -class ListServerCommand(cliConfig: CliConfig) extends ListCommand(cliConfig) {} +class ListServerCommand(cliConfig: CliConfig) extends ListCommand(cliConfig) { + override def doRun(): Seq[ServiceNodeInfo] = { + CtlUtils.listZkServerNodes(conf, normalizedCliConfig, None) + } +} diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/refresh/RefreshConfigCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/refresh/RefreshConfigCommand.scala index b658c0e45e6..69aa0c3d0f1 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/refresh/RefreshConfigCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/refresh/RefreshConfigCommand.scala @@ -21,7 +21,7 @@ import org.apache.kyuubi.KyuubiException import org.apache.kyuubi.client.AdminRestApi import org.apache.kyuubi.ctl.RestClientFactory.withKyuubiRestClient import org.apache.kyuubi.ctl.cmd.AdminCtlCommand -import org.apache.kyuubi.ctl.cmd.refresh.RefreshConfigCommandConfigType.{HADOOP_CONF, USER_DEFAULTS_CONF} +import org.apache.kyuubi.ctl.cmd.refresh.RefreshConfigCommandConfigType.{HADOOP_CONF, UNLIMITED_USERS, USER_DEFAULTS_CONF} import org.apache.kyuubi.ctl.opt.CliConfig import org.apache.kyuubi.ctl.util.{Tabulator, Validator} @@ -36,6 +36,7 @@ class RefreshConfigCommand(cliConfig: CliConfig) extends AdminCtlCommand[String] normalizedCliConfig.adminConfigOpts.configType match { case HADOOP_CONF => adminRestApi.refreshHadoopConf() case USER_DEFAULTS_CONF => adminRestApi.refreshUserDefaultsConf() + case UNLIMITED_USERS => adminRestApi.refreshUnlimitedUsers() case configType => throw new KyuubiException(s"Invalid config type:$configType") } } @@ -48,4 +49,5 @@ class RefreshConfigCommand(cliConfig: CliConfig) extends AdminCtlCommand[String] object RefreshConfigCommandConfigType { final val HADOOP_CONF = "hadoopConf" final val USER_DEFAULTS_CONF = "userDefaultsConf" + final val UNLIMITED_USERS = "unlimitedUsers" } diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala index 59ad7f5fc4c..b1a70935b0d 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala @@ -102,6 +102,6 @@ object AdminCommandLine extends CommonCommandLine { .optional() .action((v, c) => c.copy(adminConfigOpts = c.adminConfigOpts.copy(configType = v))) .text("The valid config type can be one of the following: " + - s"$HADOOP_CONF, $USER_DEFAULTS_CONF.")) + s"$HADOOP_CONF, $USER_DEFAULTS_CONF, $UNLIMITED_USERS.")) } } diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/CtlUtils.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/CtlUtils.scala index fdcc127f16a..8ce1d611a5a 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/CtlUtils.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/CtlUtils.scala @@ -25,48 +25,35 @@ import org.yaml.snakeyaml.Yaml import org.apache.kyuubi.KyuubiException import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.{ENGINE_SHARE_LEVEL, ENGINE_SHARE_LEVEL_SUBDOMAIN, ENGINE_TYPE} -import org.apache.kyuubi.ctl.opt.{CliConfig, ControlObject} +import org.apache.kyuubi.ctl.opt.CliConfig import org.apache.kyuubi.ha.client.{DiscoveryClient, DiscoveryPaths, ServiceNodeInfo} import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient object CtlUtils { - private[ctl] def getZkNamespace(conf: KyuubiConf, cliConfig: CliConfig): String = { - cliConfig.resource match { - case ControlObject.SERVER => - DiscoveryPaths.makePath(null, cliConfig.zkOpts.namespace) - case ControlObject.ENGINE => - val engineType = Some(cliConfig.engineOpts.engineType) - .filter(_ != null).filter(_.nonEmpty) - .getOrElse(conf.get(ENGINE_TYPE)) - val engineSubdomain = Some(cliConfig.engineOpts.engineSubdomain) - .filter(_ != null).filter(_.nonEmpty) - .getOrElse(conf.get(ENGINE_SHARE_LEVEL_SUBDOMAIN).getOrElse("default")) - val engineShareLevel = Some(cliConfig.engineOpts.engineShareLevel) - .filter(_ != null).filter(_.nonEmpty) - .getOrElse(conf.get(ENGINE_SHARE_LEVEL)) - // The path of the engine defined in zookeeper comes from - // org.apache.kyuubi.engine.EngineRef#engineSpace - DiscoveryPaths.makePath( - s"${cliConfig.zkOpts.namespace}_" + - s"${cliConfig.zkOpts.version}_" + - s"${engineShareLevel}_${engineType}", - cliConfig.engineOpts.user, - engineSubdomain) - } + private[ctl] def getZkServerNamespace(conf: KyuubiConf, cliConfig: CliConfig): String = { + DiscoveryPaths.makePath(null, cliConfig.zkOpts.namespace) } - private[ctl] def getServiceNodes( - discoveryClient: DiscoveryClient, - znodeRoot: String, - hostPortOpt: Option[(String, Int)]): Seq[ServiceNodeInfo] = { - val serviceNodes = discoveryClient.getServiceNodesInfo(znodeRoot) - hostPortOpt match { - case Some((host, port)) => serviceNodes.filter { sn => - sn.host == host && sn.port == port - } - case _ => serviceNodes - } + private[ctl] def getZkEngineNamespaceAndSubdomain( + conf: KyuubiConf, + cliConfig: CliConfig): (String, Option[String]) = { + val engineType = Some(cliConfig.engineOpts.engineType) + .filter(_ != null).filter(_.nonEmpty) + .getOrElse(conf.get(ENGINE_TYPE)) + val engineShareLevel = Some(cliConfig.engineOpts.engineShareLevel) + .filter(_ != null).filter(_.nonEmpty) + .getOrElse(conf.get(ENGINE_SHARE_LEVEL)) + val engineSubdomain = Option(cliConfig.engineOpts.engineSubdomain) + .filter(_.nonEmpty).orElse(conf.get(ENGINE_SHARE_LEVEL_SUBDOMAIN)) + // The path of the engine defined in zookeeper comes from + // org.apache.kyuubi.engine.EngineRef#engineSpace + val rootPath = DiscoveryPaths.makePath( + s"${cliConfig.zkOpts.namespace}_" + + s"${cliConfig.zkOpts.version}_" + + s"${engineShareLevel}_${engineType}", + cliConfig.engineOpts.user) + (rootPath, engineSubdomain) } /** @@ -75,17 +62,41 @@ object CtlUtils { private[ctl] def listZkServerNodes( conf: KyuubiConf, cliConfig: CliConfig, - filterHostPort: Boolean): Seq[ServiceNodeInfo] = { - var nodes = Seq.empty[ServiceNodeInfo] + hostPortOpt: Option[(String, Int)]): Seq[ServiceNodeInfo] = { withDiscoveryClient(conf) { discoveryClient => - val znodeRoot = getZkNamespace(conf, cliConfig) - val hostPortOpt = - if (filterHostPort) { - Some((cliConfig.zkOpts.host, cliConfig.zkOpts.port.toInt)) - } else None - nodes = getServiceNodes(discoveryClient, znodeRoot, hostPortOpt) + val znodeRoot = getZkServerNamespace(conf, cliConfig) + getServiceNodes(discoveryClient, znodeRoot, hostPortOpt) } - nodes + } + + /** + * List Kyuubi engine nodes info. + */ + private[ctl] def listZkEngineNodes( + conf: KyuubiConf, + cliConfig: CliConfig, + hostPortOpt: Option[(String, Int)]): Seq[ServiceNodeInfo] = { + withDiscoveryClient(conf) { discoveryClient => + val (znodeRoot, subdomainOpt) = getZkEngineNamespaceAndSubdomain(conf, cliConfig) + val candidates = discoveryClient.getChildren(znodeRoot) + val matched = subdomainOpt match { + case Some(subdomain) => candidates.filter(_ == subdomain) + case None => candidates + } + matched.flatMap { subdomain => + getServiceNodes(discoveryClient, s"$znodeRoot/$subdomain", hostPortOpt) + } + } + } + + private[ctl] def getServiceNodes( + discoveryClient: DiscoveryClient, + znodeRoot: String, + hostPortOpt: Option[(String, Int)]): Seq[ServiceNodeInfo] = { + val serviceNodes = discoveryClient.getServiceNodesInfo(znodeRoot) + hostPortOpt.map { case (host, port) => + serviceNodes.filter { sn => sn.host == host && sn.port == port } + }.getOrElse(serviceNodes) } private[ctl] def loadYamlAsMap(cliConfig: CliConfig): JMap[String, Object] = { diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/Render.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/Render.scala index aba6df35a4b..2d4879e42ad 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/Render.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/util/Render.scala @@ -111,6 +111,9 @@ private[ctl] object Render { private def buildBatchAppInfo(batch: Batch, showDiagnostic: Boolean = true): List[String] = { val batchAppInfo = ListBuffer[String]() + batch.getBatchInfo.asScala.foreach { case (key, value) => + batchAppInfo += s"$key: $value" + } if (batch.getAppStartTime > 0) { batchAppInfo += s"App Start Time:" + s" ${millisToDateString(batch.getAppStartTime, "yyyy-MM-dd HH:mm:ss")}" diff --git a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala index afb946e9285..dab796127e3 100644 --- a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala +++ b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala @@ -63,7 +63,7 @@ class AdminControlCliArgumentsSuite extends KyuubiFunSuite with TestPrematureExi val opArgs = new AdminControlCliArguments(args) assert(opArgs.cliConfig.action === ControlAction.REFRESH) assert(opArgs.cliConfig.resource === ControlObject.CONFIG) - assert(opArgs.cliConfig.adminConfigOpts.configType === "hadoopConf") + assert(opArgs.cliConfig.adminConfigOpts.configType === HADOOP_CONF) args = Array( "refresh", @@ -72,7 +72,16 @@ class AdminControlCliArgumentsSuite extends KyuubiFunSuite with TestPrematureExi val opArgs2 = new AdminControlCliArguments(args) assert(opArgs2.cliConfig.action === ControlAction.REFRESH) assert(opArgs2.cliConfig.resource === ControlObject.CONFIG) - assert(opArgs2.cliConfig.adminConfigOpts.configType === "userDefaultsConf") + assert(opArgs2.cliConfig.adminConfigOpts.configType === USER_DEFAULTS_CONF) + + args = Array( + "refresh", + "config", + "unlimitedUsers") + val opArgs3 = new AdminControlCliArguments(args) + assert(opArgs3.cliConfig.action === ControlAction.REFRESH) + assert(opArgs3.cliConfig.resource === ControlObject.CONFIG) + assert(opArgs3.cliConfig.adminConfigOpts.configType === UNLIMITED_USERS) args = Array( "refresh", @@ -147,7 +156,7 @@ class AdminControlCliArgumentsSuite extends KyuubiFunSuite with TestPrematureExi | Refresh the resource. |Command: refresh config [] | Refresh the config with specified type. - | The valid config type can be one of the following: $HADOOP_CONF, $USER_DEFAULTS_CONF. + | The valid config type can be one of the following: $HADOOP_CONF, $USER_DEFAULTS_CONF, $UNLIMITED_USERS. | | -h, --help Show help message and exit.""".stripMargin // scalastyle:on diff --git a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala index d27f3ec2a19..43a694a081a 100644 --- a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala +++ b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala @@ -199,20 +199,23 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { } } - test("test get zk namespace for different service type") { - val arg1 = Array( + test("test get zk server namespace") { + val args = Array( "list", "server", "--zk-quorum", zkServer.getConnectString, "--namespace", namespace) - val scArgs1 = new ControlCliArguments(arg1) - assert(CtlUtils.getZkNamespace( - scArgs1.command.conf, - scArgs1.command.normalizedCliConfig) == s"/$namespace") + val scArgs = new ControlCliArguments(args) + assert( + CtlUtils.getZkServerNamespace( + scArgs.command.conf, + scArgs.command.normalizedCliConfig) === s"/$namespace") + } - val arg2 = Array( + test("test get zk engine namespace") { + val args = Array( "list", "engine", "--zk-quorum", @@ -221,9 +224,11 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { namespace, "--user", user) - val scArgs2 = new ControlCliArguments(arg2) - assert(CtlUtils.getZkNamespace(scArgs2.command.conf, scArgs2.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user/default") + val scArgs = new ControlCliArguments(args) + val expected = (s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs.command.conf, + scArgs.command.normalizedCliConfig) === expected) } test("test list zk service nodes info") { @@ -364,8 +369,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--user", user) val scArgs1 = new ControlCliArguments(arg1) - assert(CtlUtils.getZkNamespace(scArgs1.command.conf, scArgs1.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user/default") + val expected1 = (s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs1.command.conf, + scArgs1.command.normalizedCliConfig) === expected1) val arg2 = Array( "list", @@ -379,8 +386,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-type", "FLINK_SQL") val scArgs2 = new ControlCliArguments(arg2) - assert(CtlUtils.getZkNamespace(scArgs2.command.conf, scArgs2.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_FLINK_SQL/$user/default") + val expected2 = (s"/${namespace}_${KYUUBI_VERSION}_USER_FLINK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs2.command.conf, + scArgs2.command.normalizedCliConfig) === expected2) val arg3 = Array( "list", @@ -394,8 +403,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-type", "TRINO") val scArgs3 = new ControlCliArguments(arg3) - assert(CtlUtils.getZkNamespace(scArgs3.command.conf, scArgs3.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_TRINO/$user/default") + val expected3 = (s"/${namespace}_${KYUUBI_VERSION}_USER_TRINO/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs3.command.conf, + scArgs3.command.normalizedCliConfig) === expected3) val arg4 = Array( "list", @@ -411,8 +422,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-subdomain", "sub_1") val scArgs4 = new ControlCliArguments(arg4) - assert(CtlUtils.getZkNamespace(scArgs4.command.conf, scArgs4.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user/sub_1") + val expected4 = (s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user", Some("sub_1")) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs4.command.conf, + scArgs4.command.normalizedCliConfig) === expected4) val arg5 = Array( "list", @@ -430,8 +443,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-subdomain", "sub_1") val scArgs5 = new ControlCliArguments(arg5) - assert(CtlUtils.getZkNamespace(scArgs5.command.conf, scArgs5.command.normalizedCliConfig) == - s"/${namespace}_1.5.0_USER_SPARK_SQL/$user/sub_1") + val expected5 = (s"/${namespace}_1.5.0_USER_SPARK_SQL/$user", Some("sub_1")) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs5.command.conf, + scArgs5.command.normalizedCliConfig) === expected5) } test("test get zk namespace for different share level engines") { @@ -445,8 +460,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--user", user) val scArgs1 = new ControlCliArguments(arg1) - assert(CtlUtils.getZkNamespace(scArgs1.command.conf, scArgs1.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user/default") + val expected1 = (s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs1.command.conf, + scArgs1.command.normalizedCliConfig) === expected1) val arg2 = Array( "list", @@ -460,8 +477,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-share-level", "CONNECTION") val scArgs2 = new ControlCliArguments(arg2) - assert(CtlUtils.getZkNamespace(scArgs2.command.conf, scArgs2.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL/$user/default") + val expected2 = (s"/${namespace}_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs2.command.conf, + scArgs2.command.normalizedCliConfig) === expected2) val arg3 = Array( "list", @@ -475,8 +494,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-share-level", "USER") val scArgs3 = new ControlCliArguments(arg3) - assert(CtlUtils.getZkNamespace(scArgs3.command.conf, scArgs3.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user/default") + val expected3 = (s"/${namespace}_${KYUUBI_VERSION}_USER_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs3.command.conf, + scArgs3.command.normalizedCliConfig) === expected3) val arg4 = Array( "list", @@ -490,8 +511,10 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-share-level", "GROUP") val scArgs4 = new ControlCliArguments(arg4) - assert(CtlUtils.getZkNamespace(scArgs4.command.conf, scArgs4.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_GROUP_SPARK_SQL/$user/default") + val expected4 = (s"/${namespace}_${KYUUBI_VERSION}_GROUP_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs4.command.conf, + scArgs4.command.normalizedCliConfig) === expected4) val arg5 = Array( "list", @@ -505,7 +528,9 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { "--engine-share-level", "SERVER") val scArgs5 = new ControlCliArguments(arg5) - assert(CtlUtils.getZkNamespace(scArgs5.command.conf, scArgs5.command.normalizedCliConfig) == - s"/${namespace}_${KYUUBI_VERSION}_SERVER_SPARK_SQL/$user/default") + val expected5 = (s"/${namespace}_${KYUUBI_VERSION}_SERVER_SPARK_SQL/$user", None) + assert(CtlUtils.getZkEngineNamespaceAndSubdomain( + scArgs5.command.conf, + scArgs5.command.normalizedCliConfig) === expected5) } } diff --git a/kyuubi-events/pom.xml b/kyuubi-events/pom.xml index a8030eb83d3..b97e9dffbb5 100644 --- a/kyuubi-events/pom.xml +++ b/kyuubi-events/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/kyuubi-ha/pom.xml b/kyuubi-ha/pom.xml index 8d7246effb4..b4605b6a187 100644 --- a/kyuubi-ha/pom.xml +++ b/kyuubi-ha/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT ../pom.xml diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala index d33dccf982f..148a21e4dd3 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala @@ -21,17 +21,16 @@ import java.time.Duration import org.apache.hadoop.security.UserGroupInformation -import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf, OptionalConfigEntry} +import org.apache.kyuubi.config.{ConfigEntry, KyuubiConf, OptionalConfigEntry} +import org.apache.kyuubi.config.KyuubiConf.buildConf import org.apache.kyuubi.ha.client.AuthTypes import org.apache.kyuubi.ha.client.RetryPolicies object HighAvailabilityConf { - private def buildConf(key: String): ConfigBuilder = KyuubiConf.buildConf(key) - @deprecated("using kyuubi.ha.addresses instead", "1.6.0") val HA_ZK_QUORUM: ConfigEntry[String] = buildConf("kyuubi.ha.zookeeper.quorum") - .doc("(deprecated) The connection string for the zookeeper ensemble") + .doc("(deprecated) The connection string for the ZooKeeper ensemble") .version("1.0.0") .stringConf .createWithDefault("") @@ -69,14 +68,14 @@ object HighAvailabilityConf { "1.3.2") val HA_ZK_ACL_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.ha.zookeeper.acl.enabled") - .doc("Set to true if the zookeeper ensemble is kerberized") + .doc("Set to true if the ZooKeeper ensemble is kerberized") .version("1.0.0") .booleanConf .createWithDefault(UserGroupInformation.isSecurityEnabled) val HA_ZK_AUTH_TYPE: ConfigEntry[String] = buildConf("kyuubi.ha.zookeeper.auth.type") - .doc("The type of zookeeper authentication, all candidates are " + + .doc("The type of ZooKeeper authentication, all candidates are " + s"${AuthTypes.values.mkString("
              • ", "
              • ", "
              ")}") .version("1.3.2") .stringConf @@ -85,7 +84,7 @@ object HighAvailabilityConf { val HA_ZK_ENGINE_AUTH_TYPE: ConfigEntry[String] = buildConf("kyuubi.ha.zookeeper.engine.auth.type") - .doc("The type of zookeeper authentication for engine, all candidates are " + + .doc("The type of ZooKeeper authentication for the engine, all candidates are " + s"${AuthTypes.values.mkString("
              • ", "
              • ", "
              ")}") .version("1.3.2") .stringConf @@ -94,31 +93,31 @@ object HighAvailabilityConf { val HA_ZK_AUTH_PRINCIPAL: ConfigEntry[Option[String]] = buildConf("kyuubi.ha.zookeeper.auth.principal") - .doc("Name of the Kerberos principal is used for zookeeper authentication.") + .doc("Name of the Kerberos principal is used for ZooKeeper authentication.") .version("1.3.2") .fallbackConf(KyuubiConf.SERVER_PRINCIPAL) val HA_ZK_AUTH_KEYTAB: ConfigEntry[Option[String]] = buildConf("kyuubi.ha.zookeeper.auth.keytab") - .doc("Location of Kyuubi server's keytab is used for zookeeper authentication.") + .doc("Location of the Kyuubi server's keytab is used for ZooKeeper authentication.") .version("1.3.2") .fallbackConf(KyuubiConf.SERVER_KEYTAB) val HA_ZK_AUTH_DIGEST: OptionalConfigEntry[String] = buildConf("kyuubi.ha.zookeeper.auth.digest") - .doc("The digest auth string is used for zookeeper authentication, like: username:password.") + .doc("The digest auth string is used for ZooKeeper authentication, like: username:password.") .version("1.3.2") .stringConf .createOptional val HA_ZK_CONN_MAX_RETRIES: ConfigEntry[Int] = buildConf("kyuubi.ha.zookeeper.connection.max.retries") - .doc("Max retry times for connecting to the zookeeper ensemble") + .doc("Max retry times for connecting to the ZooKeeper ensemble") .version("1.0.0") .intConf .createWithDefault(3) val HA_ZK_CONN_BASE_RETRY_WAIT: ConfigEntry[Int] = buildConf("kyuubi.ha.zookeeper.connection.base.retry.wait") - .doc("Initial amount of time to wait between retries to the zookeeper ensemble") + .doc("Initial amount of time to wait between retries to the ZooKeeper ensemble") .version("1.0.0") .intConf .createWithDefault(1000) @@ -133,7 +132,7 @@ object HighAvailabilityConf { .createWithDefault(30 * 1000) val HA_ZK_CONN_TIMEOUT: ConfigEntry[Int] = buildConf("kyuubi.ha.zookeeper.connection.timeout") - .doc("The timeout(ms) of creating the connection to the zookeeper ensemble") + .doc("The timeout(ms) of creating the connection to the ZooKeeper ensemble") .version("1.0.0") .intConf .createWithDefault(15 * 1000) @@ -146,7 +145,7 @@ object HighAvailabilityConf { val HA_ZK_CONN_RETRY_POLICY: ConfigEntry[String] = buildConf("kyuubi.ha.zookeeper.connection.retry.policy") - .doc("The retry policy for connecting to the zookeeper ensemble, all candidates are:" + + .doc("The retry policy for connecting to the ZooKeeper ensemble, all candidates are:" + s" ${RetryPolicies.values.mkString("
              • ", "
              • ", "
              ")}") .version("1.0.0") .stringConf @@ -155,7 +154,7 @@ object HighAvailabilityConf { val HA_ZK_NODE_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.ha.zookeeper.node.creation.timeout") - .doc("Timeout for creating zookeeper node") + .doc("Timeout for creating ZooKeeper node") .version("1.2.0") .timeConf .checkValue(_ > 0, "Must be positive") @@ -163,7 +162,7 @@ object HighAvailabilityConf { val HA_ENGINE_REF_ID: OptionalConfigEntry[String] = buildConf("kyuubi.ha.engine.ref.id") - .doc("The engine reference id will be attached to zookeeper node when engine started, " + + .doc("The engine reference id will be attached to ZooKeeper node when engine started, " + "and the kyuubi server will check it cyclically.") .internal .version("1.3.2") @@ -172,7 +171,7 @@ object HighAvailabilityConf { val HA_ZK_PUBLISH_CONFIGS: ConfigEntry[Boolean] = buildConf("kyuubi.ha.zookeeper.publish.configs") - .doc("When set to true, publish Kerberos configs to Zookeeper." + + .doc("When set to true, publish Kerberos configs to Zookeeper. " + "Note that the Hive driver needs to be greater than 1.3 or 2.0 or apply HIVE-11581 patch.") .version("1.4.0") .booleanConf @@ -189,8 +188,8 @@ object HighAvailabilityConf { val HA_ETCD_LEASE_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.ha.etcd.lease.timeout") - .doc("Timeout for etcd keep alive lease. The kyuubi server will known " + - "unexpected loss of engine after up to this seconds.") + .doc("Timeout for etcd keep alive lease. The kyuubi server will know " + + "the unexpected loss of engine after up to this seconds.") .version("1.6.0") .timeConf .checkValue(_ > 0, "Must be positive") @@ -198,7 +197,7 @@ object HighAvailabilityConf { val HA_ETCD_SSL_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.ha.etcd.ssl.enabled") - .doc("When set to true, will build a ssl secured etcd client.") + .doc("When set to true, will build an SSL secured etcd client.") .version("1.6.0") .booleanConf .createWithDefault(false) diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala index ad3a0550c4a..80a70f2f218 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala @@ -90,7 +90,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def createClient(): Unit = { + override def createClient(): Unit = { client = buildClient() kvClient = client.getKVClient() lockClient = client.getLockClient() @@ -99,13 +99,13 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { leaseTTL = conf.get(HighAvailabilityConf.HA_ETCD_LEASE_TIMEOUT) / 1000 } - def closeClient(): Unit = { + override def closeClient(): Unit = { if (client != null) { client.close() } } - def create(path: String, mode: String, createParent: Boolean = true): String = { + override def create(path: String, mode: String, createParent: Boolean = true): String = { // createParent can not effect here mode match { case "PERSISTENT" => kvClient.put( @@ -116,7 +116,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { path } - def getData(path: String): Array[Byte] = { + override def getData(path: String): Array[Byte] = { val response = kvClient.get(ByteSequence.from(path.getBytes())).get() if (response.getKvs.isEmpty) { throw new KyuubiException(s"Key[$path] not exists in ETCD, please check it.") @@ -125,12 +125,12 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def setData(path: String, data: Array[Byte]): Boolean = { + override def setData(path: String, data: Array[Byte]): Boolean = { val response = kvClient.put(ByteSequence.from(path.getBytes), ByteSequence.from(data)).get() response != null } - def getChildren(path: String): List[String] = { + override def getChildren(path: String): List[String] = { val kvs = kvClient.get( ByteSequence.from(path.getBytes()), GetOption.newBuilder().isPrefix(true).build()).get().getKvs @@ -142,25 +142,25 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def pathExists(path: String): Boolean = { + override def pathExists(path: String): Boolean = { !pathNonExists(path) } - def pathNonExists(path: String): Boolean = { + override def pathNonExists(path: String): Boolean = { kvClient.get(ByteSequence.from(path.getBytes())).get().getKvs.isEmpty } - def delete(path: String, deleteChildren: Boolean = false): Unit = { + override def delete(path: String, deleteChildren: Boolean = false): Unit = { kvClient.delete( ByteSequence.from(path.getBytes()), DeleteOption.newBuilder().isPrefix(deleteChildren).build()).get() } - def monitorState(serviceDiscovery: ServiceDiscovery): Unit = { + override def monitorState(serviceDiscovery: ServiceDiscovery): Unit = { // not need with etcd } - def tryWithLock[T]( + override def tryWithLock[T]( lockPath: String, timeout: Long)(f: => T): T = { // the default unit is millis, covert to seconds. @@ -195,7 +195,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def getServerHost(namespace: String): Option[(String, Int)] = { + override def getServerHost(namespace: String): Option[(String, Int)] = { // TODO: use last one because to avoid touching some maybe-crashed engines // We need a big improvement here. getServiceNodesInfo(namespace, Some(1), silent = true) match { @@ -204,7 +204,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def getEngineByRefId( + override def getEngineByRefId( namespace: String, engineRefId: String): Option[(String, Int)] = { getServiceNodesInfo(namespace, silent = true) @@ -212,7 +212,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { .map(data => (data.host, data.port)) } - def getServiceNodesInfo( + override def getServiceNodesInfo( namespace: String, sizeOpt: Option[Int] = None, silent: Boolean = false): Seq[ServiceNodeInfo] = { @@ -241,7 +241,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def registerService( + override def registerService( conf: KyuubiConf, namespace: String, serviceDiscovery: ServiceDiscovery, @@ -267,7 +267,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def deregisterService(): Unit = { + override def deregisterService(): Unit = { // close the EPHEMERAL_SEQUENTIAL node in etcd if (serviceNode != null) { if (serviceNode.lease != LEASE_NULL_VALUE) { @@ -278,7 +278,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def postDeregisterService(namespace: String): Boolean = { + override def postDeregisterService(namespace: String): Boolean = { if (namespace != null) { delete(DiscoveryPaths.makePath(null, namespace), true) true @@ -287,7 +287,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def createAndGetServiceNode( + override def createAndGetServiceNode( conf: KyuubiConf, namespace: String, instance: String, @@ -297,7 +297,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } @VisibleForTesting - def startSecretNode( + override def startSecretNode( createMode: String, basePath: String, initData: String, @@ -307,7 +307,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { ByteSequence.from(initData.getBytes())).get() } - def getAndIncrement(path: String, delta: Int = 1): Int = { + override def getAndIncrement(path: String, delta: Int = 1): Int = { val lockPath = s"${path}_tmp_for_lock" tryWithLock(lockPath, 60 * 1000) { if (pathNonExists(path)) { diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala index 1315cf02957..daa27047eb9 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala @@ -66,17 +66,17 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { @volatile private var serviceNode: PersistentNode = _ private var watcher: DeRegisterWatcher = _ - def createClient(): Unit = { + override def createClient(): Unit = { zkClient.start() } - def closeClient(): Unit = { + override def closeClient(): Unit = { if (zkClient != null) { zkClient.close() } } - def create(path: String, mode: String, createParent: Boolean = true): String = { + override def create(path: String, mode: String, createParent: Boolean = true): String = { val builder = if (createParent) zkClient.create().creatingParentsIfNeeded() else zkClient.create() builder @@ -84,27 +84,27 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { .forPath(path) } - def getData(path: String): Array[Byte] = { + override def getData(path: String): Array[Byte] = { zkClient.getData.forPath(path) } - def setData(path: String, data: Array[Byte]): Boolean = { + override def setData(path: String, data: Array[Byte]): Boolean = { zkClient.setData().forPath(path, data) != null } - def getChildren(path: String): List[String] = { + override def getChildren(path: String): List[String] = { zkClient.getChildren.forPath(path).asScala.toList } - def pathExists(path: String): Boolean = { + override def pathExists(path: String): Boolean = { zkClient.checkExists().forPath(path) != null } - def pathNonExists(path: String): Boolean = { + override def pathNonExists(path: String): Boolean = { zkClient.checkExists().forPath(path) == null } - def delete(path: String, deleteChildren: Boolean = false): Unit = { + override def delete(path: String, deleteChildren: Boolean = false): Unit = { if (deleteChildren) { zkClient.delete().deletingChildrenIfNeeded().forPath(path) } else { @@ -112,7 +112,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def monitorState(serviceDiscovery: ServiceDiscovery): Unit = { + override def monitorState(serviceDiscovery: ServiceDiscovery): Unit = { zkClient .getConnectionStateListenable.addListener(new ConnectionStateListener { private val isConnected = new AtomicBoolean(false) @@ -141,7 +141,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { }) } - def tryWithLock[T](lockPath: String, timeout: Long)(f: => T): T = { + override def tryWithLock[T](lockPath: String, timeout: Long)(f: => T): T = { var lock: InterProcessSemaphoreMutex = null try { try { @@ -189,7 +189,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def getServerHost(namespace: String): Option[(String, Int)] = { + override def getServerHost(namespace: String): Option[(String, Int)] = { // TODO: use last one because to avoid touching some maybe-crashed engines // We need a big improvement here. getServiceNodesInfo(namespace, Some(1), silent = true) match { @@ -198,7 +198,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def getEngineByRefId( + override def getEngineByRefId( namespace: String, engineRefId: String): Option[(String, Int)] = { getServiceNodesInfo(namespace, silent = true) @@ -206,7 +206,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { .map(data => (data.host, data.port)) } - def getServiceNodesInfo( + override def getServiceNodesInfo( namespace: String, sizeOpt: Option[Int] = None, silent: Boolean = false): Seq[ServiceNodeInfo] = { @@ -235,7 +235,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def registerService( + override def registerService( conf: KyuubiConf, namespace: String, serviceDiscovery: ServiceDiscovery, @@ -254,7 +254,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { watchNode() } - def deregisterService(): Unit = { + override def deregisterService(): Unit = { // close the EPHEMERAL_SEQUENTIAL node in zk if (serviceNode != null) { try { @@ -268,7 +268,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def postDeregisterService(namespace: String): Boolean = { + override def postDeregisterService(namespace: String): Boolean = { if (namespace != null) { try { delete(namespace, true) @@ -283,7 +283,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } } - def createAndGetServiceNode( + override def createAndGetServiceNode( conf: KyuubiConf, namespace: String, instance: String, @@ -293,7 +293,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { } @VisibleForTesting - def startSecretNode( + override def startSecretNode( createMode: String, basePath: String, initData: String, @@ -307,7 +307,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { secretNode.start() } - def getAndIncrement(path: String, delta: Int = 1): Int = { + override def getAndIncrement(path: String, delta: Int = 1): Int = { val dai = new DistributedAtomicInteger(zkClient, path, new RetryForever(1000)) var atomicVal: AtomicValue[Integer] = null do { diff --git a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClientSuite.scala b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClientSuite.scala index 5b8855c1ee9..de48a3495db 100644 --- a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClientSuite.scala +++ b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClientSuite.scala @@ -22,6 +22,9 @@ import java.nio.charset.StandardCharsets import scala.collection.JavaConverters._ import io.etcd.jetcd.launcher.{Etcd, EtcdCluster} +import org.scalactic.source.Position +import org.scalatest.Tag +import org.testcontainers.DockerClientFactory import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.ha.HighAvailabilityConf.{HA_ADDRESSES, HA_CLIENT_CLASS} @@ -41,25 +44,38 @@ class EtcdDiscoveryClientSuite extends DiscoveryClientTests { var conf: KyuubiConf = KyuubiConf() .set(HA_CLIENT_CLASS, classOf[EtcdDiscoveryClient].getName) + private val hasDockerEnv = DockerClientFactory.instance().isDockerAvailable + override def beforeAll(): Unit = { - etcdCluster = new Etcd.Builder() - .withNodes(2) - .build() - etcdCluster.start() - conf = new KyuubiConf() - .set(HA_CLIENT_CLASS, classOf[EtcdDiscoveryClient].getName) - .set(HA_ADDRESSES, getConnectString) + if (hasDockerEnv) { + etcdCluster = new Etcd.Builder() + .withNodes(2) + .build() + etcdCluster.start() + conf = new KyuubiConf() + .set(HA_CLIENT_CLASS, classOf[EtcdDiscoveryClient].getName) + .set(HA_ADDRESSES, getConnectString) + } super.beforeAll() } override def afterAll(): Unit = { super.afterAll() - if (etcdCluster != null) { + if (hasDockerEnv && etcdCluster != null) { etcdCluster.close() etcdCluster = null } } + override protected def test( + testName: String, + testTags: Tag*)(testFun: => Any)(implicit pos: Position): Unit = { + if (hasDockerEnv) { + super.test(testName, testTags: _*)(testFun) + } + // skip test + } + test("etcd test: set, get and delete") { withDiscoveryClient(conf) { discoveryClient => val path = "/kyuubi" diff --git a/kyuubi-hive-beeline/README.md b/kyuubi-hive-beeline/README.md index ec4f86fd769..161acb99b64 100644 --- a/kyuubi-hive-beeline/README.md +++ b/kyuubi-hive-beeline/README.md @@ -3,3 +3,4 @@ Aiming to make a better supported beeline for Kyuubi - Support to show launch engine log when getting KyuubiConnection(Done, available since v1.4.0-incubating) + diff --git a/kyuubi-hive-beeline/pom.xml b/kyuubi-hive-beeline/pom.xml index 76753b38d60..beacba438c2 100644 --- a/kyuubi-hive-beeline/pom.xml +++ b/kyuubi-hive-beeline/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT kyuubi-hive-beeline @@ -115,6 +115,12 @@ commons-io + + org.mockito + mockito-core + test + + commons-lang commons-lang diff --git a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java index c88ceb5a86e..7ca7671486b 100644 --- a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java +++ b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java @@ -40,6 +40,7 @@ public KyuubiBeeLine() { this(true); } + @SuppressWarnings("deprecation") public KyuubiBeeLine(boolean isBeeLine) { super(isBeeLine); try { diff --git a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiCommands.java b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiCommands.java index 57241784761..311cb6a9538 100644 --- a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiCommands.java +++ b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiCommands.java @@ -19,6 +19,7 @@ import static org.apache.kyuubi.jdbc.hive.JdbcConnectionParams.*; +import com.google.common.annotations.VisibleForTesting; import java.io.*; import java.sql.*; import java.util.*; @@ -93,8 +94,9 @@ private boolean sourceFileInternal(File sourceFile) throws IOException { lines += "\n" + extra; } } - String[] cmds = lines.split(";"); + String[] cmds = lines.split(beeLine.getOpts().getDelimiter()); for (String c : cmds) { + c = c.trim(); if (!executeInternal(c, false)) { return false; } @@ -276,7 +278,8 @@ private boolean execute(String line, boolean call, boolean entireLineAsCommand) * quotations. It iterates through each character in the line and checks to see if it is a ;, ', * or " */ - private List getCmdList(String line, boolean entireLineAsCommand) { + @VisibleForTesting + public List getCmdList(String line, boolean entireLineAsCommand) { List cmdList = new ArrayList(); if (entireLineAsCommand) { cmdList.add(line); @@ -405,7 +408,7 @@ private String getProperty(Properties props, String[] keys) { } } - for (Iterator i = props.keySet().iterator(); i.hasNext(); ) { + for (Iterator i = props.keySet().iterator(); i.hasNext(); ) { String key = (String) i.next(); for (int j = 0; j < keys.length; j++) { if (key.endsWith(keys[j])) { @@ -470,9 +473,7 @@ public boolean connect(Properties props) throws IOException { props.setProperty(AUTH_USER, username); if (password == null) { password = - beeLine - .getConsoleReader() - .readLine("Enter password for " + urlForPrompt + ": ", new Character('*')); + beeLine.getConsoleReader().readLine("Enter password for " + urlForPrompt + ": ", '*'); } props.setProperty(AUTH_PASSWD, password); } @@ -487,6 +488,9 @@ public boolean connect(Properties props) throws IOException { beeLine.updateOptsForCli(); } beeLine.runInit(); + if (beeLine.getOpts().getInitFiles() != null) { + beeLine.initializeConsoleReader(null); + } beeLine.setCompletions(); beeLine.getOpts().setLastConnectedUrl(url); diff --git a/kyuubi-hive-beeline/src/test/java/org/apache/hive/beeline/KyuubiCommandsTest.java b/kyuubi-hive-beeline/src/test/java/org/apache/hive/beeline/KyuubiCommandsTest.java new file mode 100644 index 00000000000..ecb8d65f502 --- /dev/null +++ b/kyuubi-hive-beeline/src/test/java/org/apache/hive/beeline/KyuubiCommandsTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.beeline; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.List; +import jline.console.ConsoleReader; +import org.junit.Test; +import org.mockito.Mockito; + +public class KyuubiCommandsTest { + @Test + public void testParsePythonSnippets() throws IOException { + ConsoleReader reader = Mockito.mock(ConsoleReader.class); + String pythonSnippets = "for i in [1, 2, 3]:\n" + " print(i)\n"; + Mockito.when(reader.readLine()).thenReturn(pythonSnippets); + + KyuubiBeeLine beeline = new KyuubiBeeLine(); + beeline.setConsoleReader(reader); + KyuubiCommands commands = new KyuubiCommands(beeline); + String line = commands.handleMultiLineCmd(pythonSnippets); + + List cmdList = commands.getCmdList(line, false); + assertEquals(cmdList.size(), 1); + assertEquals(cmdList.get(0), pythonSnippets); + } +} diff --git a/kyuubi-hive-jdbc-shaded/pom.xml b/kyuubi-hive-jdbc-shaded/pom.xml index 0bfe88922da..1a6f258b02f 100644 --- a/kyuubi-hive-jdbc-shaded/pom.xml +++ b/kyuubi-hive-jdbc-shaded/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT kyuubi-hive-jdbc-shaded diff --git a/kyuubi-hive-jdbc/README.md b/kyuubi-hive-jdbc/README.md index 3210e76ac56..10a0522dc38 100644 --- a/kyuubi-hive-jdbc/README.md +++ b/kyuubi-hive-jdbc/README.md @@ -1,9 +1,9 @@ # Kyuubi Hive JDBC Module - Aiming to make a better supported client for Kyuubi and Spark - Add catalog to getTables meta function for DataLakes (DONE, broken in v1.3.0-incubating, fixed in v1.3.1-incubating) - Deploy to maven central (DONE, available since v1.3.0-incubating) - Create shaded jar (DONE, available since v1.4.0-incubating) - Remove Hive dependencies (DONE, available since v1.6.0-incubating) + diff --git a/kyuubi-hive-jdbc/pom.xml b/kyuubi-hive-jdbc/pom.xml index 4d9648e75f6..36ea7acc274 100644 --- a/kyuubi-hive-jdbc/pom.xml +++ b/kyuubi-hive-jdbc/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT kyuubi-hive-jdbc @@ -171,6 +171,14 @@ + + + + true + src/main/resources + + + org.apache.maven.plugins diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumnAttributes.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumnAttributes.java index 06fb398999a..b0257cfff09 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumnAttributes.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumnAttributes.java @@ -20,7 +20,7 @@ public class JdbcColumnAttributes { public int precision = 0; public int scale = 0; - public String timeZone = ""; + public String timeZone = null; public JdbcColumnAttributes() {} diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java index e45b6545471..ef5008503aa 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java @@ -34,6 +34,7 @@ import org.apache.kyuubi.jdbc.hive.arrow.ArrowUtils; /** Data independent base class which implements the common part of all Kyuubi result sets. */ +@SuppressWarnings("deprecation") public abstract class KyuubiArrowBasedResultSet implements SQLResultSet { protected Statement statement = null; @@ -49,6 +50,7 @@ public abstract class KyuubiArrowBasedResultSet implements SQLResultSet { protected Schema arrowSchema; protected VectorSchemaRoot root; protected ArrowColumnarBatchRow row; + protected boolean timestampAsString = true; protected BufferAllocator allocator; @@ -311,11 +313,18 @@ private Object getColumnValue(int columnIndex) throws SQLException { if (wasNull) { return null; } else { - return row.get(columnIndex - 1, columnType); + JdbcColumnAttributes attributes = columnAttributes.get(columnIndex - 1); + return row.get( + columnIndex - 1, + columnType, + attributes == null ? null : attributes.timeZone, + timestampAsString); } } catch (Exception e) { - e.printStackTrace(); - throw new KyuubiSQLException("Unrecognized column type:", e); + throw new KyuubiSQLException( + String.format( + "Error getting row of type %s at column index %d", columnType, columnIndex - 1), + e); } } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java index 1f2af29dc16..fda70f463e9 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java @@ -58,9 +58,6 @@ public class KyuubiArrowQueryResultSet extends KyuubiArrowBasedResultSet { private boolean isScrollable = false; private boolean fetchFirst = false; - // TODO:(fchen) make this configurable - protected boolean convertComplexTypeToString = true; - private final TProtocolVersion protocol; public static class Builder { @@ -87,6 +84,8 @@ public static class Builder { private boolean isScrollable = false; private ReentrantLock transportLock = null; + private boolean timestampAsString = true; + public Builder(Statement statement) throws SQLException { this.statement = statement; this.connection = statement.getConnection(); @@ -153,6 +152,11 @@ public Builder setScrollable(boolean setScrollable) { return this; } + public Builder setTimestampAsString(boolean timestampAsString) { + this.timestampAsString = timestampAsString; + return this; + } + public Builder setTransportLock(ReentrantLock transportLock) { this.transportLock = transportLock; return this; @@ -189,10 +193,10 @@ protected KyuubiArrowQueryResultSet(Builder builder) throws SQLException { this.maxRows = builder.maxRows; } this.isScrollable = builder.isScrollable; + this.timestampAsString = builder.timestampAsString; this.protocol = builder.getProtocolVersion(); arrowSchema = - ArrowUtils.toArrowSchema( - columnNames, convertComplexTypeToStringType(columnTypes), columnAttributes); + ArrowUtils.toArrowSchema(columnNames, convertToStringType(columnTypes), columnAttributes); if (allocator == null) { initArrowSchemaAndAllocator(); } @@ -271,8 +275,7 @@ private void retrieveSchema() throws SQLException { columnAttributes.add(getColumnAttributes(primitiveTypeEntry)); } arrowSchema = - ArrowUtils.toArrowSchema( - columnNames, convertComplexTypeToStringType(columnTypes), columnAttributes); + ArrowUtils.toArrowSchema(columnNames, convertToStringType(columnTypes), columnAttributes); } catch (SQLException eS) { throw eS; // rethrow the SQLException as is } catch (Exception ex) { @@ -480,22 +483,25 @@ public boolean isClosed() { return isClosed; } - private List convertComplexTypeToStringType(List colTypes) { - if (convertComplexTypeToString) { - return colTypes.stream() - .map( - type -> { - if (type == TTypeId.ARRAY_TYPE - || type == TTypeId.MAP_TYPE - || type == TTypeId.STRUCT_TYPE) { - return TTypeId.STRING_TYPE; - } else { - return type; - } - }) - .collect(Collectors.toList()); - } else { - return colTypes; - } + /** + * 1. the complex types (map/array/struct) are always converted to string type to transport 2. if + * the user set `timestampAsString = true`, then the timestamp type will be converted to string + * type too. + */ + private List convertToStringType(List colTypes) { + return colTypes.stream() + .map( + type -> { + if ((type == TTypeId.ARRAY_TYPE + || type == TTypeId.MAP_TYPE + || type == TTypeId.STRUCT_TYPE) // complex type (map/array/struct) + // timestamp type + || (type == TTypeId.TIMESTAMP_TYPE && timestampAsString)) { + return TTypeId.STRING_TYPE; + } else { + return type; + } + }) + .collect(Collectors.toList()); } } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java index 5fe889346e6..a9d32e8cafb 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java @@ -32,6 +32,7 @@ import org.apache.kyuubi.jdbc.hive.common.TimestampTZUtil; /** Data independent base class which implements the common part of all Kyuubi result sets. */ +@SuppressWarnings("deprecation") public abstract class KyuubiBaseResultSet implements SQLResultSet { protected Statement statement = null; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java index 1d7755b1ef9..f9935d23e19 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java @@ -30,10 +30,7 @@ import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.security.AccessControlContext; -import java.security.AccessController; -import java.security.KeyStore; -import java.security.SecureRandom; +import java.security.*; import java.sql.*; import java.util.*; import java.util.Map.Entry; @@ -43,6 +40,7 @@ import javax.net.ssl.TrustManagerFactory; import javax.security.auth.Subject; import javax.security.sasl.Sasl; +import org.apache.commons.lang3.ClassUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hive.service.rpc.thrift.*; import org.apache.http.HttpRequestInterceptor; @@ -109,6 +107,7 @@ public class KyuubiConnection implements SQLConnection, KyuubiLoggable { private String engineId = ""; private String engineName = ""; private String engineUrl = ""; + private String engineRefId = ""; private boolean isBeeLineMode; @@ -738,6 +737,7 @@ private void openSession() throws SQLException { } catch (UnknownHostException e) { LOG.debug("Error getting Kyuubi session local client ip address", e); } + openConf.put(Utils.KYUUBI_CLIENT_VERSION_KEY, Utils.getVersion()); openReq.setConfiguration(openConf); // Store the user name in the open request in case no non-sasl authentication @@ -812,11 +812,16 @@ private boolean isSaslAuthMode() { return !AUTH_SIMPLE.equalsIgnoreCase(sessConfMap.get(AUTH_TYPE)); } - private boolean isFromSubjectAuthMode() { - return isSaslAuthMode() - && hasSessionValue(AUTH_PRINCIPAL) - && AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT.equalsIgnoreCase( - sessConfMap.get(AUTH_KERBEROS_AUTH_TYPE)); + private boolean isHadoopUserGroupInformationDoAs() { + try { + @SuppressWarnings("unchecked") + Class HadoopUserClz = + (Class) ClassUtils.getClass("org.apache.hadoop.security.User"); + Subject subject = Subject.getSubject(AccessController.getContext()); + return subject != null && !subject.getPrincipals(HadoopUserClz).isEmpty(); + } catch (ClassNotFoundException e) { + return false; + } } private boolean isKeytabAuthMode() { @@ -826,6 +831,16 @@ && hasSessionValue(AUTH_KYUUBI_CLIENT_PRINCIPAL) && hasSessionValue(AUTH_KYUUBI_CLIENT_KEYTAB); } + private boolean isFromSubjectAuthMode() { + return isSaslAuthMode() + && hasSessionValue(AUTH_PRINCIPAL) + && !hasSessionValue(AUTH_KYUUBI_CLIENT_PRINCIPAL) + && !hasSessionValue(AUTH_KYUUBI_CLIENT_KEYTAB) + && (AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT.equalsIgnoreCase( + sessConfMap.get(AUTH_KERBEROS_AUTH_TYPE)) + || isHadoopUserGroupInformationDoAs()); + } + private boolean isTgtCacheAuthMode() { return isSaslAuthMode() && hasSessionValue(AUTH_PRINCIPAL) @@ -842,15 +857,15 @@ private boolean isKerberosAuthMode() { } private Subject createSubject() { - if (isFromSubjectAuthMode()) { + if (isKeytabAuthMode()) { + String principal = sessConfMap.get(AUTH_KYUUBI_CLIENT_PRINCIPAL); + String keytab = sessConfMap.get(AUTH_KYUUBI_CLIENT_KEYTAB); + return KerberosAuthenticationManager.getKeytabAuthentication(principal, keytab).getSubject(); + } else if (isFromSubjectAuthMode()) { AccessControlContext context = AccessController.getContext(); return Subject.getSubject(context); } else if (isTgtCacheAuthMode()) { return KerberosAuthenticationManager.getTgtCacheAuthentication().getSubject(); - } else if (isKeytabAuthMode()) { - String principal = sessConfMap.get(AUTH_KYUUBI_CLIENT_PRINCIPAL); - String keytab = sessConfMap.get(AUTH_KYUUBI_CLIENT_KEYTAB); - return KerberosAuthenticationManager.getKeytabAuthentication(principal, keytab).getSubject(); } else { // This should never happen throw new IllegalArgumentException("Unsupported auth mode"); @@ -1248,6 +1263,7 @@ public TProtocolVersion getProtocol() { return protocol; } + @SuppressWarnings("rawtypes") public static TCLIService.Iface newSynchronizedClient(TCLIService.Iface client) { return (TCLIService.Iface) Proxy.newProxyInstance( @@ -1286,6 +1302,7 @@ public Object invoke(Object proxy, Method method, Object[] args) throws Throwabl } } + @SuppressWarnings("fallthrough") public void waitLaunchEngineToComplete() throws SQLException { if (launchEngineOpHandle == null) return; @@ -1354,6 +1371,8 @@ private void fetchLaunchEngineResult() { engineName = value; } else if ("url".equals(key)) { engineUrl = value; + } else if ("refId".equals(key)) { + engineRefId = value; } } } catch (Exception e) { @@ -1372,4 +1391,8 @@ public String getEngineName() { public String getEngineUrl() { return engineUrl; } + + public String getEngineRefId() { + return engineRefId; + } } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java index 43c2a030bc8..a0d4f3bfd25 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java @@ -26,9 +26,7 @@ import java.sql.Timestamp; import java.sql.Types; import java.text.MessageFormat; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Scanner; import org.apache.hive.service.rpc.thrift.TCLIService; import org.apache.hive.service.rpc.thrift.TSessionHandle; @@ -81,57 +79,7 @@ public int executeUpdate() throws SQLException { /** update the SQL string with parameters set by setXXX methods of {@link PreparedStatement} */ private String updateSql(final String sql, HashMap parameters) throws SQLException { - List parts = splitSqlStatement(sql); - - StringBuilder newSql = new StringBuilder(parts.get(0)); - for (int i = 1; i < parts.size(); i++) { - if (!parameters.containsKey(i)) { - throw new KyuubiSQLException("Parameter #" + i + " is unset"); - } - newSql.append(parameters.get(i)); - newSql.append(parts.get(i)); - } - return newSql.toString(); - } - - /** - * Splits the parametered sql statement at parameter boundaries. - * - *

              taking into account ' and \ escaping. - * - *

              output for: 'select 1 from ? where a = ?' ['select 1 from ',' where a = ',''] - */ - private List splitSqlStatement(String sql) { - List parts = new ArrayList<>(); - int apCount = 0; - int off = 0; - boolean skip = false; - - for (int i = 0; i < sql.length(); i++) { - char c = sql.charAt(i); - if (skip) { - skip = false; - continue; - } - switch (c) { - case '\'': - apCount++; - break; - case '\\': - skip = true; - break; - case '?': - if ((apCount & 1) == 0) { - parts.add(sql.substring(off, i)); - off = i + 1; - } - break; - default: - break; - } - } - parts.add(sql.substring(off, sql.length())); - return parts; + return Utils.updateSql(sql, parameters); } @Override diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java index ee3dc71a97d..cbe32eca65e 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java @@ -37,6 +37,7 @@ public class KyuubiStatement implements SQLStatement, KyuubiLoggable { public static final Logger LOG = LoggerFactory.getLogger(KyuubiStatement.class.getName()); public static final int DEFAULT_FETCH_SIZE = 1000; public static final String DEFAULT_RESULT_FORMAT = "thrift"; + public static final String DEFAULT_ARROW_TIMESTAMP_AS_STRING = "false"; private final KyuubiConnection connection; private TCLIService.Iface client; private TOperationHandle stmtHandle = null; @@ -45,7 +46,8 @@ public class KyuubiStatement implements SQLStatement, KyuubiLoggable { private int fetchSize = DEFAULT_FETCH_SIZE; private boolean isScrollableResultset = false; private boolean isOperationComplete = false; - private Map properties = new HashMap<>(); + + private Map properties = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); /** * We need to keep a reference to the result set to support the following: * statement.execute(String sql); @@ -210,9 +212,14 @@ private boolean executeWithConfOverlay(String sql, Map confOverl String resultFormat = properties.getOrDefault("__kyuubi_operation_result_format__", DEFAULT_RESULT_FORMAT); - LOG.info("kyuubi.operation.result.format: " + resultFormat); + LOG.debug("kyuubi.operation.result.format: {}", resultFormat); switch (resultFormat) { case "arrow": + boolean timestampAsString = + Boolean.parseBoolean( + properties.getOrDefault( + "__kyuubi_operation_result_arrow_timestampAsString__", + DEFAULT_ARROW_TIMESTAMP_AS_STRING)); resultSet = new KyuubiArrowQueryResultSet.Builder(this) .setClient(client) @@ -222,6 +229,7 @@ private boolean executeWithConfOverlay(String sql, Map confOverl .setFetchSize(fetchSize) .setScrollable(isScrollableResultset) .setSchema(columnNames, columnTypes, columnAttributes) + .setTimestampAsString(timestampAsString) .build(); break; default: @@ -267,9 +275,14 @@ public boolean executeAsync(String sql) throws SQLException { String resultFormat = properties.getOrDefault("__kyuubi_operation_result_format__", DEFAULT_RESULT_FORMAT); - LOG.info("kyuubi.operation.result.format: " + resultFormat); + LOG.debug("kyuubi.operation.result.format: {}", resultFormat); switch (resultFormat) { case "arrow": + boolean timestampAsString = + Boolean.parseBoolean( + properties.getOrDefault( + "__kyuubi_operation_result_arrow_timestampAsString__", + DEFAULT_ARROW_TIMESTAMP_AS_STRING)); resultSet = new KyuubiArrowQueryResultSet.Builder(this) .setClient(client) @@ -279,7 +292,9 @@ public boolean executeAsync(String sql) throws SQLException { .setFetchSize(fetchSize) .setScrollable(isScrollableResultset) .setSchema(columnNames, columnTypes, columnAttributes) + .setTimestampAsString(timestampAsString) .build(); + break; default: resultSet = new KyuubiQueryResultSet.Builder(this) diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java index c5b197f13df..ac9b29664c0 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java @@ -22,10 +22,12 @@ import java.net.InetAddress; import java.net.URI; import java.net.UnknownHostException; +import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; import org.apache.hive.service.rpc.thrift.TStatus; import org.apache.hive.service.rpc.thrift.TStatusCode; import org.slf4j.Logger; @@ -88,6 +90,62 @@ static void verifySuccess(TStatus status, boolean withInfo) throws SQLException throw new KyuubiSQLException(status); } + /** + * Splits the parametered sql statement at parameter boundaries. + * + *

              taking into account ' and \ escaping. + * + *

              output for: 'select 1 from ? where a = ?' ['select 1 from ',' where a = ',''] + */ + static List splitSqlStatement(String sql) { + List parts = new ArrayList<>(); + int apCount = 0; + int off = 0; + boolean skip = false; + + for (int i = 0; i < sql.length(); i++) { + char c = sql.charAt(i); + if (skip) { + skip = false; + continue; + } + switch (c) { + case '\'': + apCount++; + break; + case '\\': + skip = true; + break; + case '?': + if ((apCount & 1) == 0) { + parts.add(sql.substring(off, i)); + off = i + 1; + } + break; + default: + break; + } + } + parts.add(sql.substring(off, sql.length())); + return parts; + } + + /** update the SQL string with parameters set by setXXX methods of {@link PreparedStatement} */ + public static String updateSql(final String sql, HashMap parameters) + throws SQLException { + List parts = splitSqlStatement(sql); + + StringBuilder newSql = new StringBuilder(parts.get(0)); + for (int i = 1; i < parts.size(); i++) { + if (!parameters.containsKey(i)) { + throw new KyuubiSQLException("Parameter #" + i + " is unset"); + } + newSql.append(parameters.get(i)); + newSql.append(parts.get(i)); + } + return newSql.toString(); + } + public static JdbcConnectionParams parseURL(String uri) throws JdbcUriParseException, SQLException, ZooKeeperHiveClientException { return parseURL(uri, new Properties()); @@ -193,12 +251,20 @@ public static JdbcConnectionParams extractURLComponents(String uri, Properties i } } + Pattern confPattern = Pattern.compile("([^;]*)([^;]*);?"); + // parse hive conf settings String confStr = jdbcURI.getQuery(); if (confStr != null) { - Matcher confMatcher = pattern.matcher(confStr); + Matcher confMatcher = confPattern.matcher(confStr); while (confMatcher.find()) { - connParams.getHiveConfs().put(confMatcher.group(1), confMatcher.group(2)); + String connParam = confMatcher.group(1); + if (StringUtils.isNotBlank(connParam) && connParam.contains("=")) { + int symbolIndex = connParam.indexOf('='); + connParams + .getHiveConfs() + .put(connParam.substring(0, symbolIndex), connParam.substring(symbolIndex + 1)); + } } } @@ -477,4 +543,21 @@ public static String getCanonicalHostName(String hostName) { public static boolean isKyuubiOperationHint(String hint) { return KYUUBI_OPERATION_HINT_PATTERN.matcher(hint).matches(); } + + public static final String KYUUBI_CLIENT_VERSION_KEY = "kyuubi.client.version"; + private static String KYUUBI_CLIENT_VERSION; + + public static synchronized String getVersion() { + if (KYUUBI_CLIENT_VERSION == null) { + try { + Properties prop = new Properties(); + prop.load(Utils.class.getClassLoader().getResourceAsStream("version.properties")); + KYUUBI_CLIENT_VERSION = prop.getProperty(KYUUBI_CLIENT_VERSION_KEY, "unknown"); + } catch (Exception e) { + LOG.error("Error getting kyuubi client version", e); + KYUUBI_CLIENT_VERSION = "unknown"; + } + } + return KYUUBI_CLIENT_VERSION; + } } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelper.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelper.java index 349fc8dfb6b..41fadfa2f68 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelper.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelper.java @@ -17,6 +17,7 @@ package org.apache.kyuubi.jdbc.hive; +import com.google.common.annotations.VisibleForTesting; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -32,12 +33,14 @@ class ZooKeeperHiveClientHelper { // Pattern for key1=value1;key2=value2 private static final Pattern kvPattern = Pattern.compile("([^=;]*)=([^;]*);?"); - private static String getZooKeeperNamespace(JdbcConnectionParams connParams) { + @VisibleForTesting + protected static String getZooKeeperNamespace(JdbcConnectionParams connParams) { String zooKeeperNamespace = connParams.getSessionVars().get(JdbcConnectionParams.ZOOKEEPER_NAMESPACE); if ((zooKeeperNamespace == null) || (zooKeeperNamespace.isEmpty())) { zooKeeperNamespace = JdbcConnectionParams.ZOOKEEPER_DEFAULT_NAMESPACE; } + zooKeeperNamespace = zooKeeperNamespace.replaceAll("^/+", "").replaceAll("/+$", ""); return zooKeeperNamespace; } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLCallableStatement.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLCallableStatement.java index 9ebe07011f7..4e62a3b00a3 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLCallableStatement.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLCallableStatement.java @@ -25,6 +25,7 @@ import java.util.Calendar; import java.util.Map; +@SuppressWarnings("deprecation") public interface SQLCallableStatement extends CallableStatement { @Override diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLPreparedStatement.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLPreparedStatement.java index 6bc9d383a1b..cbcaf2788e1 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLPreparedStatement.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLPreparedStatement.java @@ -23,6 +23,7 @@ import java.sql.*; import java.util.Calendar; +@SuppressWarnings("deprecation") public interface SQLPreparedStatement extends PreparedStatement { @Override diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java index 8523e4b8d64..70c8ff4fe57 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java @@ -25,6 +25,7 @@ import java.util.Calendar; import java.util.Map; +@SuppressWarnings("deprecation") public interface SQLResultSet extends ResultSet { @Override diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java index 20ed55a1d62..373867069b4 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java @@ -19,6 +19,8 @@ import java.math.BigDecimal; import java.sql.Timestamp; +import java.time.LocalDateTime; +import org.apache.arrow.vector.util.DateUtility; import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.common.DateUtils; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalDayTime; @@ -104,7 +106,11 @@ public Object getMap(int ordinal) { throw new UnsupportedOperationException(); } - public Object get(int ordinal, TTypeId dataType) { + public Object get(int ordinal, TTypeId dataType, String timeZone, boolean timestampAsString) { + long seconds; + long milliseconds; + long microseconds; + int nanos; switch (dataType) { case BOOLEAN_TYPE: return getBoolean(ordinal); @@ -127,13 +133,19 @@ public Object get(int ordinal, TTypeId dataType) { case STRING_TYPE: return getString(ordinal); case TIMESTAMP_TYPE: - return new Timestamp(getLong(ordinal) / 1000); + if (timestampAsString) { + return Timestamp.valueOf(getString(ordinal)); + } else { + LocalDateTime localDateTime = + DateUtility.getLocalDateTimeFromEpochMicro(getLong(ordinal), timeZone); + return Timestamp.valueOf(localDateTime); + } case DATE_TYPE: return DateUtils.internalToDate(getInt(ordinal)); case INTERVAL_DAY_TIME_TYPE: - long microseconds = getLong(ordinal); - long seconds = microseconds / 1000000; - int nanos = (int) (microseconds % 1000000) * 1000; + microseconds = getLong(ordinal); + seconds = microseconds / 1_000_000; + nanos = (int) (microseconds % 1_000_000) * 1_000; return new HiveIntervalDayTime(seconds, nanos); case INTERVAL_YEAR_MONTH_TYPE: return new HiveIntervalYearMonth(getInt(ordinal)); diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java index 1d1587b5444..9a777d4c240 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java @@ -79,6 +79,8 @@ public static ArrowType toArrowType(TTypeId ttype, JdbcColumnAttributes jdbcColu if (jdbcColumnAttributes != null) { return ArrowType.Decimal.createDecimal( jdbcColumnAttributes.precision, jdbcColumnAttributes.scale, null); + } else { + throw new IllegalStateException("Missing precision and scale where it is mandatory."); } case DATE_TYPE: return new ArrowType.Date(DateUnit.DAY); diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java index 0cdbb9a5e5d..e703cb1f00c 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java @@ -89,6 +89,7 @@ public ColumnBuffer(TColumn colValues) { } } + @SuppressWarnings("unchecked") public ColumnBuffer(TTypeId type, BitSet nulls, Object values) { this.type = type; this.nulls = nulls; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimal.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimal.java index 09b46fe4911..41fb9f840b9 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimal.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimal.java @@ -457,6 +457,7 @@ protected boolean fastScaleByPowerOfTen(int n, FastHiveDecimal fastResult) { fastSignum, fast0, fast1, fast2, fastIntegerDigitCount, fastScale, n, fastResult); } + @SuppressWarnings("deprecation") protected static String fastRoundingModeToString(int roundingMode) { String roundingModeString; switch (roundingMode) { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimalImpl.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimalImpl.java index 619371cbfb4..d3dba0f7b7a 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimalImpl.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/FastHiveDecimalImpl.java @@ -32,6 +32,7 @@ * vectorization to implement decimals by storing the fast0, fast1, and fast2 longs and the * fastSignum, fastScale, etc ints in the DecimalColumnVector class. */ +@SuppressWarnings("deprecation") public class FastHiveDecimalImpl extends FastHiveDecimal { /** @@ -9369,7 +9370,7 @@ public static String getStackTraceAsSingleLine(StackTraceElement[] stackTrace) { public static String displayBytes(byte[] bytes, int start, int length) { StringBuilder sb = new StringBuilder(); for (int i = start; i < start + length; i++) { - sb.append(String.format("\\%03d", (int) (bytes[i] & 0xff))); + sb.append(String.format("\\%03d", bytes[i] & 0xff)); } return sb.toString(); } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/HiveDecimal.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/HiveDecimal.java index bd4906ec7df..b8faa2305ca 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/HiveDecimal.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/common/HiveDecimal.java @@ -80,6 +80,7 @@ *

              The original V1 public methods and fields are annotated with @HiveDecimalVersionV1; new public * methods and fields are annotated with @HiveDecimalVersionV2. */ +@SuppressWarnings("deprecation") public final class HiveDecimal extends FastHiveDecimal implements Comparable { /* diff --git a/kyuubi-hive-jdbc/src/main/resources/version.properties b/kyuubi-hive-jdbc/src/main/resources/version.properties new file mode 100644 index 00000000000..82ae50cfbf6 --- /dev/null +++ b/kyuubi-hive-jdbc/src/main/resources/version.properties @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +kyuubi.client.version = ${project.version} diff --git a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java index c890c873190..b01957b3e43 100644 --- a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java +++ b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java @@ -21,9 +21,15 @@ import static org.apache.kyuubi.jdbc.hive.Utils.extractURLComponents; import static org.junit.Assert.assertEquals; +import com.google.common.collect.ImmutableMap; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; +import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -35,23 +41,76 @@ public class UtilsTest { private String expectedPort; private String expectedCatalog; private String expectedDb; + private Map expectedHiveConf; private String uri; @Parameterized.Parameters - public static Collection data() { + public static Collection data() throws UnsupportedEncodingException { return Arrays.asList( - new String[][] { - {"localhost", "10009", null, "db", "jdbc:hive2:///db;k1=v1?k2=v2#k3=v3"}, - {"localhost", "10009", null, "default", "jdbc:hive2:///"}, - {"localhost", "10009", null, "default", "jdbc:kyuubi://"}, - {"localhost", "10009", null, "default", "jdbc:hive2://"}, - {"hostname", "10018", null, "db", "jdbc:hive2://hostname:10018/db;k1=v1?k2=v2#k3=v3"}, + new Object[][] { + { + "localhost", + "10009", + null, + "db", + new ImmutableMap.Builder().put("k2", "v2").build(), + "jdbc:hive2:///db;k1=v1?k2=v2#k3=v3" + }, + { + "localhost", + "10009", + null, + "default", + new ImmutableMap.Builder().build(), + "jdbc:hive2:///" + }, + { + "localhost", + "10009", + null, + "default", + new ImmutableMap.Builder().build(), + "jdbc:kyuubi://" + }, + { + "localhost", + "10009", + null, + "default", + new ImmutableMap.Builder().build(), + "jdbc:hive2://" + }, + { + "hostname", + "10018", + null, + "db", + new ImmutableMap.Builder().put("k2", "v2").build(), + "jdbc:hive2://hostname:10018/db;k1=v1?k2=v2#k3=v3" + }, { "hostname", "10018", "catalog", "db", + new ImmutableMap.Builder().put("k2", "v2").build(), "jdbc:hive2://hostname:10018/catalog/db;k1=v1?k2=v2#k3=v3" + }, + { + "hostname", + "10018", + "catalog", + "db", + new ImmutableMap.Builder() + .put("k2", "v2") + .put("k3", "-Xmx2g -XX:+PrintGCDetails -XX:HeapDumpPath=/heap.hprof") + .build(), + "jdbc:hive2://hostname:10018/catalog/db;k1=v1?" + + URLEncoder.encode( + "k2=v2;k3=-Xmx2g -XX:+PrintGCDetails -XX:HeapDumpPath=/heap.hprof", + StandardCharsets.UTF_8.toString()) + .replaceAll("\\+", "%20") + + "#k4=v4" } }); } @@ -61,11 +120,13 @@ public UtilsTest( String expectedPort, String expectedCatalog, String expectedDb, + Map expectedHiveConf, String uri) { this.expectedHost = expectedHost; this.expectedPort = expectedPort; this.expectedCatalog = expectedCatalog; this.expectedDb = expectedDb; + this.expectedHiveConf = expectedHiveConf; this.uri = uri; } @@ -76,5 +137,12 @@ public void testExtractURLComponents() throws JdbcUriParseException { assertEquals(Integer.parseInt(expectedPort), jdbcConnectionParams1.getPort()); assertEquals(expectedCatalog, jdbcConnectionParams1.getCatalogName()); assertEquals(expectedDb, jdbcConnectionParams1.getDbName()); + assertEquals(expectedHiveConf, jdbcConnectionParams1.getHiveConfs()); + } + + @Test + public void testGetVersion() { + Pattern pattern = Pattern.compile("^\\d+\\.\\d+\\.\\d+.*"); + assert pattern.matcher(Utils.getVersion()).matches(); } } diff --git a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelperTest.java b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelperTest.java new file mode 100644 index 00000000000..d1fd78f473e --- /dev/null +++ b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/ZooKeeperHiveClientHelperTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.jdbc.hive; + +import static org.apache.kyuubi.jdbc.hive.Utils.extractURLComponents; +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Properties; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class ZooKeeperHiveClientHelperTest { + + private String uri; + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList( + new String[][] { + {"jdbc:hive2://hostname:10018/db;zooKeeperNamespace=zookeeper/namespace"}, + {"jdbc:hive2://hostname:10018/db;zooKeeperNamespace=/zookeeper/namespace"}, + {"jdbc:hive2://hostname:10018/db;zooKeeperNamespace=zookeeper/namespace/"}, + {"jdbc:hive2://hostname:10018/db;zooKeeperNamespace=/zookeeper/namespace/"}, + {"jdbc:hive2://hostname:10018/db;zooKeeperNamespace=///zookeeper/namespace///"} + }); + } + + public ZooKeeperHiveClientHelperTest(String uri) { + this.uri = uri; + } + + @Test + public void testGetZooKeeperNamespace() throws JdbcUriParseException { + JdbcConnectionParams jdbcConnectionParams = extractURLComponents(uri, new Properties()); + assertEquals( + "zookeeper/namespace", + ZooKeeperHiveClientHelper.getZooKeeperNamespace(jdbcConnectionParams)); + } +} diff --git a/kyuubi-metrics/pom.xml b/kyuubi-metrics/pom.xml index b8ba40f4762..2edeb73c7ce 100644 --- a/kyuubi-metrics/pom.xml +++ b/kyuubi-metrics/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT kyuubi-metrics_2.12 diff --git a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/JsonReporterService.scala b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/JsonReporterService.scala index cb0ef740431..7b172fc1eb9 100644 --- a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/JsonReporterService.scala +++ b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/JsonReporterService.scala @@ -65,7 +65,7 @@ class JsonReporterService(registry: MetricRegistry) Files.setPosixFilePermissions(tmpPath, PosixFilePermissions.fromString("rwxr--r--")) Files.move(tmpPath, reportPath, StandardCopyOption.REPLACE_EXISTING) } catch { - case NonFatal(e) => error("Error writing metrics to json file" + reportPath, e) + case NonFatal(e) => error(s"Error writing metrics to json file: $reportPath", e) } finally { if (writer != null) writer.close() } diff --git a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala index cacc15b185e..ad734ced5d7 100644 --- a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala +++ b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala @@ -19,13 +19,12 @@ package org.apache.kyuubi.metrics import java.time.Duration -import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf} +import org.apache.kyuubi.config.ConfigEntry +import org.apache.kyuubi.config.KyuubiConf.buildConf import org.apache.kyuubi.metrics.ReporterType._ object MetricsConf { - private def buildConf(key: String): ConfigBuilder = KyuubiConf.buildConf(key) - val METRICS_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.metrics.enabled") .doc("Set to true to enable kyuubi metrics system") @@ -34,12 +33,12 @@ object MetricsConf { .createWithDefault(true) val METRICS_REPORTERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.metrics.reporters") - .doc("A comma separated list for all metrics reporters" + + .doc("A comma-separated list for all metrics reporters" + "

                " + "
              • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
              • " + "
              • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
              • " + "
              • JSON - JsonReporter which outputs measurements to json file periodically.
              • " + - "
              • PROMETHEUS - PrometheusReporter which exposes metrics in prometheus format.
              • " + + "
              • PROMETHEUS - PrometheusReporter which exposes metrics in Prometheus format.
              • " + "
              • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
              • " + "
              ") .version("1.2.0") @@ -58,13 +57,13 @@ object MetricsConf { .createWithDefault(Duration.ofSeconds(5).toMillis) val METRICS_JSON_LOCATION: ConfigEntry[String] = buildConf("kyuubi.metrics.json.location") - .doc("Where the json metrics file located") + .doc("Where the JSON metrics file located") .version("1.2.0") .stringConf .createWithDefault("metrics") val METRICS_JSON_INTERVAL: ConfigEntry[Long] = buildConf("kyuubi.metrics.json.interval") - .doc("How often should report metrics to json file") + .doc("How often should report metrics to JSON file") .version("1.2.0") .timeConf .createWithDefault(Duration.ofSeconds(5).toMillis) diff --git a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala index 3f967abe6e6..e97fd28ea25 100644 --- a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala +++ b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala @@ -29,6 +29,7 @@ object MetricsConstants { final val EXEC_POOL_ALIVE: String = KYUUBI + "exec.pool.threads.alive" final val EXEC_POOL_ACTIVE: String = KYUUBI + "exec.pool.threads.active" + final val EXEC_POOL_WORK_QUEUE_SIZE: String = KYUUBI + "exec.pool.work_queue.size" final private val CONN = KYUUBI + "connection." final private val THRIFT_HTTP_CONN = KYUUBI + "thrift.http.connection." @@ -61,6 +62,7 @@ object MetricsConstants { final val OPERATION_FAIL: String = OPERATION + "failed" final val OPERATION_TOTAL: String = OPERATION + "total" final val OPERATION_STATE: String = OPERATION + "state" + final val OPERATION_EXEC_TIME: String = OPERATION + "exec_time" final private val BACKEND_SERVICE = KYUUBI + "backend_service." final val BS_FETCH_LOG_ROWS_RATE = BACKEND_SERVICE + "fetch_log_rows_rate" diff --git a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsSystem.scala b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsSystem.scala index 2507eb77387..99da1f1b06e 100644 --- a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsSystem.scala +++ b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsSystem.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.metrics import java.lang.management.ManagementFactory import java.util.concurrent.TimeUnit -import com.codahale.metrics.{Gauge, MetricRegistry} +import com.codahale.metrics.{Gauge, MetricRegistry, Snapshot} import com.codahale.metrics.jvm._ import org.apache.kyuubi.config.KyuubiConf @@ -121,4 +121,8 @@ object MetricsSystem { def meterValue(name: String): Option[Long] = { maybeSystem.map(_.registry.meter(name).getCount) } + + def histogramSnapshot(name: String): Option[Snapshot] = { + maybeSystem.map(_.registry.histogram(name).getSnapshot) + } } diff --git a/kyuubi-rest-client/pom.xml b/kyuubi-rest-client/pom.xml index 6e07b6267c4..a9ceb9bb3cb 100644 --- a/kyuubi-rest-client/pom.xml +++ b/kyuubi-rest-client/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT kyuubi-rest-client @@ -59,6 +59,11 @@ httpclient + + org.apache.httpcomponents + httpmime + + org.apache.hadoop hadoop-client-api @@ -77,6 +82,11 @@ slf4j-api + + org.slf4j + jcl-over-slf4j + + org.apache.logging.log4j log4j-slf4j-impl @@ -110,6 +120,13 @@ + + + true + src/main/resources + + + net.alchim31.maven diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java index da9782df5b1..c81af593ae4 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java @@ -22,6 +22,8 @@ import java.util.List; import java.util.Map; import org.apache.kyuubi.client.api.v1.dto.Engine; +import org.apache.kyuubi.client.api.v1.dto.OperationData; +import org.apache.kyuubi.client.api.v1.dto.SessionData; public class AdminRestApi { private KyuubiRestClient client; @@ -44,6 +46,11 @@ public String refreshUserDefaultsConf() { return this.getClient().post(path, null, client.getAuthHeader()); } + public String refreshUnlimitedUsers() { + String path = String.format("%s/%s", API_BASE_PATH, "refresh/unlimited_users"); + return this.getClient().post(path, null, client.getAuthHeader()); + } + public String deleteEngine( String engineType, String shareLevel, String subdomain, String hs2ProxyUser) { Map params = new HashMap<>(); @@ -67,6 +74,31 @@ public List listEngines( return Arrays.asList(result); } + public List listSessions() { + SessionData[] result = + this.getClient() + .get(API_BASE_PATH + "/sessions", null, SessionData[].class, client.getAuthHeader()); + return Arrays.asList(result); + } + + public String closeSession(String sessionHandleStr) { + String url = String.format("%s/sessions/%s", API_BASE_PATH, sessionHandleStr); + return this.getClient().delete(url, null, client.getAuthHeader()); + } + + public List listOperations() { + OperationData[] result = + this.getClient() + .get( + API_BASE_PATH + "/operations", null, OperationData[].class, client.getAuthHeader()); + return Arrays.asList(result); + } + + public String closeOperation(String operationHandleStr) { + String url = String.format("%s/operations/%s", API_BASE_PATH, operationHandleStr); + return this.getClient().delete(url, null, client.getAuthHeader()); + } + private IRestClient getClient() { return this.client.getHttpClient(); } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java index a5f27590eb6..f5099568b21 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java @@ -17,14 +17,12 @@ package org.apache.kyuubi.client; +import java.io.File; import java.util.HashMap; import java.util.Map; -import org.apache.kyuubi.client.api.v1.dto.Batch; -import org.apache.kyuubi.client.api.v1.dto.BatchRequest; -import org.apache.kyuubi.client.api.v1.dto.CloseBatchResponse; -import org.apache.kyuubi.client.api.v1.dto.GetBatchesResponse; -import org.apache.kyuubi.client.api.v1.dto.OperationLog; +import org.apache.kyuubi.client.api.v1.dto.*; import org.apache.kyuubi.client.util.JsonUtils; +import org.apache.kyuubi.client.util.VersionUtils; public class BatchRestApi { @@ -39,10 +37,19 @@ public BatchRestApi(KyuubiRestClient client) { } public Batch createBatch(BatchRequest request) { + setClientVersion(request); String requestBody = JsonUtils.toJson(request); return this.getClient().post(API_BASE_PATH, requestBody, Batch.class, client.getAuthHeader()); } + public Batch createBatch(BatchRequest request, File resourceFile) { + setClientVersion(request); + Map multiPartMap = new HashMap<>(); + multiPartMap.put("batchRequest", new MultiPart(MultiPart.MultiPartType.JSON, request)); + multiPartMap.put("resourceFile", new MultiPart(MultiPart.MultiPartType.FILE, resourceFile)); + return this.getClient().post(API_BASE_PATH, multiPartMap, Batch.class, client.getAuthHeader()); + } + public Batch getBatchById(String batchId) { String path = String.format("%s/%s", API_BASE_PATH, batchId); return this.getClient().get(path, null, Batch.class, client.getAuthHeader()); @@ -92,4 +99,13 @@ public CloseBatchResponse deleteBatch(String batchId, String hs2ProxyUser) { private IRestClient getClient() { return this.client.getHttpClient(); } + + private void setClientVersion(BatchRequest request) { + if (request != null) { + Map newConf = new HashMap<>(); + newConf.putAll(request.getConf()); + newConf.put(VersionUtils.KYUUBI_CLIENT_VERSION_KEY, VersionUtils.getVersion()); + request.setConf(newConf); + } + } } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/IRestClient.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/IRestClient.java index 50436ef736b..0eaffebd246 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/IRestClient.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/IRestClient.java @@ -18,6 +18,7 @@ package org.apache.kyuubi.client; import java.util.Map; +import org.apache.kyuubi.client.api.v1.dto.MultiPart; /** A underlying http client interface for common rest request. */ public interface IRestClient extends AutoCloseable { @@ -27,8 +28,14 @@ public interface IRestClient extends AutoCloseable { T post(String path, String body, Class type, String authHeader); + T post(String path, Map multiPartMap, Class type, String authHeader); + String post(String path, String body, String authHeader); + T put(String path, String body, Class type, String authHeader); + + String put(String path, String body, String authHeader); + T delete(String path, Map params, Class type, String authHeader); String delete(String path, Map params, String authHeader); diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/KyuubiRestClient.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/KyuubiRestClient.java index a6079e9e0fe..dbcc89b16d3 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/KyuubiRestClient.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/KyuubiRestClient.java @@ -165,13 +165,16 @@ public static class Builder { private String password; - private int socketTimeout = 3000; + // 2 minutes + private int socketTimeout = 2 * 60 * 1000; - private int connectTimeout = 3000; + // 30s + private int connectTimeout = 30 * 1000; private int maxAttempts = 3; - private int attemptWaitTime = 3000; + // 3s + private int attemptWaitTime = 3 * 1000; public Builder(String hostUrl) { if (StringUtils.isBlank(hostUrl)) { diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RestClient.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RestClient.java index 7b93f559ed7..6447d547765 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RestClient.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RestClient.java @@ -17,6 +17,7 @@ package org.apache.kyuubi.client; +import java.io.File; import java.net.ConnectException; import java.net.URI; import java.net.URISyntaxException; @@ -26,15 +27,22 @@ import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpEntity; import org.apache.http.HttpHeaders; +import org.apache.http.NoHttpResponseException; import org.apache.http.client.HttpResponseException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.RequestBuilder; import org.apache.http.client.utils.URIBuilder; import org.apache.http.conn.ConnectTimeoutException; +import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.entity.mime.content.ContentBody; +import org.apache.http.entity.mime.content.FileBody; +import org.apache.http.entity.mime.content.StringBody; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils; +import org.apache.kyuubi.client.api.v1.dto.MultiPart; import org.apache.kyuubi.client.exception.KyuubiRestException; import org.apache.kyuubi.client.exception.RetryableKyuubiRestException; import org.apache.kyuubi.client.util.JsonUtils; @@ -87,6 +95,52 @@ public String post(String path, String body, String authHeader) { return doRequest(buildURI(path), authHeader, postRequestBuilder); } + @Override + public T post( + String path, Map multiPartMap, Class type, String authHeader) { + MultipartEntityBuilder entityBuilder = + MultipartEntityBuilder.create().setCharset(StandardCharsets.UTF_8); + multiPartMap.forEach( + (s, multiPart) -> { + ContentBody contentBody; + Object payload = multiPart.getPayload(); + switch (multiPart.getType()) { + case JSON: + String string = + (payload instanceof String) ? (String) payload : JsonUtils.toJson(payload); + contentBody = new StringBody(string, ContentType.APPLICATION_JSON); + break; + case FILE: + contentBody = new FileBody((File) payload); + break; + default: + throw new RuntimeException("Unsupported multi part type:" + multiPart); + } + entityBuilder.addPart(s, contentBody); + }); + HttpEntity httpEntity = entityBuilder.build(); + RequestBuilder postRequestBuilder = RequestBuilder.post(buildURI(path)); + postRequestBuilder.setHeader(httpEntity.getContentType()); + postRequestBuilder.setEntity(httpEntity); + String responseJson = doRequest(buildURI(path), authHeader, postRequestBuilder); + return JsonUtils.fromJson(responseJson, type); + } + + @Override + public T put(String path, String body, Class type, String authHeader) { + String responseJson = put(path, body, authHeader); + return JsonUtils.fromJson(responseJson, type); + } + + @Override + public String put(String path, String body, String authHeader) { + RequestBuilder putRequestBuilder = RequestBuilder.put(); + if (body != null) { + putRequestBuilder.setEntity(new StringEntity(body, StandardCharsets.UTF_8)); + } + return doRequest(buildURI(path), authHeader, putRequestBuilder); + } + @Override public T delete(String path, Map params, Class type, String authHeader) { String responseJson = delete(path, params, authHeader); @@ -101,14 +155,14 @@ public String delete(String path, Map params, String authHeader) private String doRequest(URI uri, String authHeader, RequestBuilder requestBuilder) { String response; try { + if (requestBuilder.getFirstHeader(HttpHeaders.CONTENT_TYPE) == null) { + requestBuilder.setHeader( + HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); + } if (StringUtils.isNotBlank(authHeader)) { requestBuilder.setHeader(HttpHeaders.AUTHORIZATION, authHeader); } - HttpUriRequest httpRequest = - requestBuilder - .setUri(uri) - .setHeader(HttpHeaders.CONTENT_TYPE, "application/json") - .build(); + HttpUriRequest httpRequest = requestBuilder.setUri(uri).build(); LOG.debug("Executing {} request: {}", httpRequest.getMethod(), uri); @@ -126,7 +180,7 @@ private String doRequest(URI uri, String authHeader, RequestBuilder requestBuild response = httpclient.execute(httpRequest, responseHandler); LOG.debug("Response: {}", response); - } catch (ConnectException | ConnectTimeoutException e) { + } catch (ConnectException | ConnectTimeoutException | NoHttpResponseException e) { // net exception can be retried by connecting to other Kyuubi server throw new RetryableKyuubiRestException("Api request failed for " + uri.toString(), e); } catch (KyuubiRestException rethrow) { diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RetryableRestClient.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RetryableRestClient.java index 6dd378a9ab0..dcd052acae4 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RetryableRestClient.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/RetryableRestClient.java @@ -48,6 +48,7 @@ private RetryableRestClient(List uris, RestClientConf conf) { newRestClient(); } + @SuppressWarnings("rawtypes") public static IRestClient getRestClient(List uris, RestClientConf conf) { RetryableRestClient client = new RetryableRestClient(uris, conf); return (IRestClient) diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/SessionRestApi.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/SessionRestApi.java index fbb424102db..a4c3bb7ab24 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/SessionRestApi.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/SessionRestApi.java @@ -20,7 +20,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; -import org.apache.kyuubi.client.api.v1.dto.SessionData; +import org.apache.kyuubi.client.api.v1.dto.*; +import org.apache.kyuubi.client.util.JsonUtils; public class SessionRestApi { @@ -41,6 +42,102 @@ public List listSessions() { return Arrays.asList(result); } + public SessionHandle openSession(SessionOpenRequest sessionOpenRequest) { + return this.getClient() + .post( + API_BASE_PATH, + JsonUtils.toJson(sessionOpenRequest), + SessionHandle.class, + client.getAuthHeader()); + } + + public String closeSession(String sessionHandleStr) { + String path = String.format("%s/%s", API_BASE_PATH, sessionHandleStr); + return this.getClient().delete(path, new HashMap<>(), client.getAuthHeader()); + } + + public KyuubiSessionEvent getSessionEvent(String sessionHandleStr) { + String path = String.format("%s/%s", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .get(path, new HashMap<>(), KyuubiSessionEvent.class, client.getAuthHeader()); + } + + public InfoDetail getSessionInfo(String sessionHandleStr, int infoType) { + String path = String.format("%s/%s/info/%s", API_BASE_PATH, sessionHandleStr, infoType); + return this.getClient().get(path, new HashMap<>(), InfoDetail.class, client.getAuthHeader()); + } + + public int getOpenSessionCount() { + String path = String.format("%s/count", API_BASE_PATH); + return this.getClient() + .get(path, new HashMap<>(), SessionOpenCount.class, client.getAuthHeader()) + .getOpenSessionCount(); + } + + public ExecPoolStatistic getExecPoolStatistic() { + String path = String.format("%s/execPool/statistic", API_BASE_PATH); + return this.getClient() + .get(path, new HashMap<>(), ExecPoolStatistic.class, client.getAuthHeader()); + } + + public OperationHandle executeStatement(String sessionHandleStr, StatementRequest request) { + String path = String.format("%s/%s/operations/statement", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getTypeInfo(String sessionHandleStr) { + String path = String.format("%s/%s/operations/typeInfo", API_BASE_PATH, sessionHandleStr); + return this.getClient().post(path, "", OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getCatalogs(String sessionHandleStr) { + String path = String.format("%s/%s/operations/catalogs", API_BASE_PATH, sessionHandleStr); + return this.getClient().post(path, "", OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getSchemas(String sessionHandleStr, GetSchemasRequest request) { + String path = String.format("%s/%s/operations/schemas", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getTables(String sessionHandleStr, GetTablesRequest request) { + String path = String.format("%s/%s/operations/tables", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getTableTypes(String sessionHandleStr) { + String path = String.format("%s/%s/operations/tableTypes", API_BASE_PATH, sessionHandleStr); + return this.getClient().post(path, "", OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getColumns(String sessionHandleStr, GetColumnsRequest request) { + String path = String.format("%s/%s/operations/columns", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getFunctions(String sessionHandleStr, GetFunctionsRequest request) { + String path = String.format("%s/%s/operations/functions", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getPrimaryKeys(String sessionHandleStr, GetPrimaryKeysRequest request) { + String path = String.format("%s/%s/operations/primaryKeys", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + + public OperationHandle getCrossReference( + String sessionHandleStr, GetCrossReferenceRequest request) { + String path = String.format("%s/%s/operations/crossReference", API_BASE_PATH, sessionHandleStr); + return this.getClient() + .post(path, JsonUtils.toJson(request), OperationHandle.class, client.getAuthHeader()); + } + private IRestClient getClient() { return this.client.getHttpClient(); } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/Batch.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/Batch.java index 43fbf10af58..b318b709d5e 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/Batch.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/Batch.java @@ -17,6 +17,8 @@ package org.apache.kyuubi.client.api.v1.dto; +import java.util.Collections; +import java.util.Map; import java.util.Objects; import org.apache.commons.lang3.builder.ReflectionToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; @@ -35,6 +37,7 @@ public class Batch { private String state; private long createTime; private long endTime; + private Map batchInfo = Collections.emptyMap(); public Batch() {} @@ -51,7 +54,8 @@ public Batch( String kyuubiInstance, String state, long createTime, - long endTime) { + long endTime, + Map batchInfo) { this.id = id; this.user = user; this.batchType = batchType; @@ -65,6 +69,7 @@ public Batch( this.state = state; this.createTime = createTime; this.endTime = endTime; + this.batchInfo = batchInfo; } public String getId() { @@ -171,6 +176,17 @@ public void setEndTime(long endTime) { this.endTime = endTime; } + public Map getBatchInfo() { + if (batchInfo == null) { + return Collections.emptyMap(); + } + return batchInfo; + } + + public void setBatchInfo(Map batchInfo) { + this.batchInfo = batchInfo; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/BatchRequest.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/BatchRequest.java index f10a8fdb5f2..f45821fc232 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/BatchRequest.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/BatchRequest.java @@ -29,8 +29,8 @@ public class BatchRequest { private String resource; private String className; private String name; - private Map conf; - private List args; + private Map conf = Collections.emptyMap(); + private List args = Collections.emptyList(); public BatchRequest() {} @@ -54,8 +54,6 @@ public BatchRequest(String batchType, String resource, String className, String this.resource = resource; this.className = className; this.name = name; - this.conf = Collections.emptyMap(); - this.args = Collections.emptyList(); } public String getBatchType() { diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/ExecPoolStatistic.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/ExecPoolStatistic.java index ee8a9f0072c..a40811f92bf 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/ExecPoolStatistic.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/ExecPoolStatistic.java @@ -24,12 +24,14 @@ public class ExecPoolStatistic { private int execPoolSize; private int execPoolActiveCount; + private int execPoolWorkQueueSize; public ExecPoolStatistic() {} - public ExecPoolStatistic(int execPoolSize, int execPoolActiveCount) { + public ExecPoolStatistic(int execPoolSize, int execPoolActiveCount, int execPoolWorkQueueSize) { this.execPoolSize = execPoolSize; this.execPoolActiveCount = execPoolActiveCount; + this.execPoolWorkQueueSize = execPoolWorkQueueSize; } public int getExecPoolSize() { @@ -48,18 +50,27 @@ public void setExecPoolActiveCount(int execPoolActiveCount) { this.execPoolActiveCount = execPoolActiveCount; } + public int getExecPoolWorkQueueSize() { + return execPoolWorkQueueSize; + } + + public void setExecPoolWorkQueueSize(int execPoolWorkQueueSize) { + this.execPoolWorkQueueSize = execPoolWorkQueueSize; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ExecPoolStatistic that = (ExecPoolStatistic) o; return getExecPoolSize() == that.getExecPoolSize() - && getExecPoolActiveCount() == that.getExecPoolActiveCount(); + && getExecPoolActiveCount() == that.getExecPoolActiveCount() + && getExecPoolWorkQueueSize() == that.getExecPoolWorkQueueSize(); } @Override public int hashCode() { - return Objects.hash(getExecPoolSize(), getExecPoolActiveCount()); + return Objects.hash(getExecPoolSize(), getExecPoolActiveCount(), getExecPoolWorkQueueSize()); } @Override diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiEvent.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiEvent.java new file mode 100644 index 00000000000..8de12508914 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiEvent.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.api.v1.dto; + +public interface KyuubiEvent {} diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiSessionEvent.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiSessionEvent.java new file mode 100644 index 00000000000..4c3cbcfd540 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiSessionEvent.java @@ -0,0 +1,361 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.api.v1.dto; + +import java.util.Map; + +public class KyuubiSessionEvent implements KyuubiEvent { + + private String sessionId; + + private int clientVersion; + + private String sessionType; + + private String sessionName; + + private String remoteSessionId; + + private String engineId; + + private String user; + + private String clientIp; + + private String serverIp; + + private Map conf; + + private long eventTime; + + private long openedTime; + + private long startTime; + + private long endTime; + + private int totalOperations; + + private Throwable exception; + + public KyuubiSessionEvent() {} + + public KyuubiSessionEvent( + String sessionId, + int clientVersion, + String sessionType, + String sessionName, + String remoteSessionId, + String engineId, + String user, + String clientIp, + String serverIp, + Map conf, + long eventTime, + long openedTime, + long startTime, + long endTime, + int totalOperations, + Throwable exception) { + this.sessionId = sessionId; + this.clientVersion = clientVersion; + this.sessionType = sessionType; + this.sessionName = sessionName; + this.remoteSessionId = remoteSessionId; + this.engineId = engineId; + this.user = user; + this.clientIp = clientIp; + this.serverIp = serverIp; + this.conf = conf; + this.eventTime = eventTime; + this.openedTime = openedTime; + this.startTime = startTime; + this.endTime = endTime; + this.totalOperations = totalOperations; + this.exception = exception; + } + + public static KyuubiSessionEvent.KyuubiSessionEventBuilder builder() { + return new KyuubiSessionEvent.KyuubiSessionEventBuilder(); + } + + public static class KyuubiSessionEventBuilder { + private String sessionId; + + private int clientVersion; + + private String sessionType; + + private String sessionName; + + private String remoteSessionId; + + private String engineId; + + private String user; + + private String clientIp; + + private String serverIp; + + private Map conf; + + private long eventTime; + + private long openedTime; + + private long startTime; + + private long endTime; + + private int totalOperations; + + private Throwable exception; + + public KyuubiSessionEventBuilder() {} + + public KyuubiSessionEvent.KyuubiSessionEventBuilder sessionId(final String sessionId) { + this.sessionId = sessionId; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder remoteSessionId( + final String remoteSessionId) { + this.remoteSessionId = remoteSessionId; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder clientVersion(final int clientVersion) { + this.clientVersion = clientVersion; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder sessionType(final String sessionType) { + this.sessionType = sessionType; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder sessionName(final String sessionName) { + this.sessionName = sessionName; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder engineId(final String engineId) { + this.engineId = engineId; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder user(final String user) { + this.user = user; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder clientIp(final String clientIp) { + this.clientIp = clientIp; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder serverIp(final String serverIp) { + this.serverIp = serverIp; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder conf(final Map conf) { + this.conf = conf; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder eventTime(final long eventTime) { + this.eventTime = eventTime; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder openedTime(final long openedTime) { + this.openedTime = openedTime; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder startTime(final long startTime) { + this.startTime = startTime; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder endTime(final long endTime) { + this.endTime = endTime; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder totalOperations(final int totalOperations) { + this.totalOperations = totalOperations; + return this; + } + + public KyuubiSessionEvent.KyuubiSessionEventBuilder exception(final Throwable exception) { + this.exception = exception; + return this; + } + + public KyuubiSessionEvent build() { + return new KyuubiSessionEvent( + sessionId, + clientVersion, + sessionType, + sessionName, + remoteSessionId, + engineId, + user, + clientIp, + serverIp, + conf, + eventTime, + openedTime, + startTime, + endTime, + totalOperations, + exception); + } + } + + public String getSessionId() { + return sessionId; + } + + public void setSessionId(String sessionId) { + this.sessionId = sessionId; + } + + public int getClientVersion() { + return clientVersion; + } + + public void setClientVersion(int clientVersion) { + this.clientVersion = clientVersion; + } + + public String getSessionType() { + return sessionType; + } + + public void setSessionType(String sessionType) { + this.sessionType = sessionType; + } + + public String getSessionName() { + return sessionName; + } + + public void setSessionName(String sessionName) { + this.sessionName = sessionName; + } + + public String getRemoteSessionId() { + return remoteSessionId; + } + + public void setRemoteSessionId(String remoteSessionId) { + this.remoteSessionId = remoteSessionId; + } + + public String getEngineId() { + return engineId; + } + + public void setEngineId(String engineId) { + this.engineId = engineId; + } + + public String getUser() { + return user; + } + + public void setUser(String user) { + this.user = user; + } + + public String getClientIp() { + return clientIp; + } + + public void setClientIp(String clientIp) { + this.clientIp = clientIp; + } + + public String getServerIp() { + return serverIp; + } + + public void setServerIp(String serverIp) { + this.serverIp = serverIp; + } + + public Map getConf() { + return conf; + } + + public void setConf(Map conf) { + this.conf = conf; + } + + public long getEventTime() { + return eventTime; + } + + public void setEventTime(long eventTime) { + this.eventTime = eventTime; + } + + public long getOpenedTime() { + return openedTime; + } + + public void setOpenedTime(long openedTime) { + this.openedTime = openedTime; + } + + public long getStartTime() { + return startTime; + } + + public void setStartTime(long startTime) { + this.startTime = startTime; + } + + public long getEndTime() { + return endTime; + } + + public void setEndTime(long endTime) { + this.endTime = endTime; + } + + public int getTotalOperations() { + return totalOperations; + } + + public void setTotalOperations(int totalOperations) { + this.totalOperations = totalOperations; + } + + public Throwable getException() { + return exception; + } + + public void setException(Throwable exception) { + this.exception = exception; + } +} diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/MultiPart.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/MultiPart.java new file mode 100644 index 00000000000..997a629fec1 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/MultiPart.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.api.v1.dto; + +public class MultiPart { + private MultiPartType type; + private Object payload; + + public enum MultiPartType { + FILE, + JSON + } + + public MultiPart(MultiPartType type, Object obj) { + this.type = type; + this.payload = obj; + } + + public Object getPayload() { + return payload; + } + + public void setPayload(Object payload) { + this.payload = payload; + } + + public MultiPartType getType() { + return type; + } + + public void setType(MultiPartType type) { + this.type = type; + } +} diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java new file mode 100644 index 00000000000..1b99bb2c690 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.api.v1.dto; + +import java.util.Objects; +import org.apache.commons.lang3.builder.ReflectionToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + +public class OperationData { + private String identifier; + private String statement; + private String state; + private Long createTime; + private Long startTime; + private Long completeTime; + private String exception; + private String sessionId; + private String sessionUser; + private String sessionType; + private String kyuubiInstance; + + public OperationData() {} + + public OperationData( + String identifier, + String statement, + String state, + Long createTime, + Long startTime, + Long completeTime, + String exception, + String sessionId, + String sessionUser, + String sessionType, + String kyuubiInstance) { + this.identifier = identifier; + this.statement = statement; + this.state = state; + this.createTime = createTime; + this.startTime = startTime; + this.completeTime = completeTime; + this.exception = exception; + this.sessionId = sessionId; + this.sessionUser = sessionUser; + this.sessionType = sessionType; + this.kyuubiInstance = kyuubiInstance; + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getStatement() { + return statement; + } + + public void setStatement(String statement) { + this.statement = statement; + } + + public String getState() { + return state; + } + + public void setState(String state) { + this.state = state; + } + + public Long getCreateTime() { + return createTime; + } + + public void setCreateTime(Long createTime) { + this.createTime = createTime; + } + + public Long getStartTime() { + return startTime; + } + + public void setStartTime(Long startTime) { + this.startTime = startTime; + } + + public Long getCompleteTime() { + return completeTime; + } + + public void setCompleteTime(Long completeTime) { + this.completeTime = completeTime; + } + + public String getException() { + return exception; + } + + public void setException(String exception) { + this.exception = exception; + } + + public String getSessionId() { + return sessionId; + } + + public void setSessionId(String sessionId) { + this.sessionId = sessionId; + } + + public String getSessionUser() { + return sessionUser; + } + + public void setSessionUser(String sessionUser) { + this.sessionUser = sessionUser; + } + + public String getSessionType() { + return sessionType; + } + + public void setSessionType(String sessionType) { + this.sessionType = sessionType; + } + + public String getKyuubiInstance() { + return kyuubiInstance; + } + + public void setKyuubiInstance(String kyuubiInstance) { + this.kyuubiInstance = kyuubiInstance; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SessionData that = (SessionData) o; + return Objects.equals(getIdentifier(), that.getIdentifier()); + } + + @Override + public int hashCode() { + return Objects.hash(getIdentifier()); + } + + @Override + public String toString() { + return ReflectionToStringBuilder.toString(this, ToStringStyle.JSON_STYLE); + } +} diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationHandle.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationHandle.java new file mode 100644 index 00000000000..394e6c157c7 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationHandle.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.api.v1.dto; + +import java.util.Objects; +import java.util.UUID; +import org.apache.commons.lang3.builder.ReflectionToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + +public class OperationHandle { + + private UUID identifier; + + public OperationHandle() {} + + public OperationHandle(UUID identifier) { + this.identifier = identifier; + } + + public UUID getIdentifier() { + return identifier; + } + + public void setIdentifier(UUID identifier) { + this.identifier = identifier; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OperationHandle that = (OperationHandle) o; + return Objects.equals(identifier, that.identifier); + } + + @Override + public int hashCode() { + return Objects.hash(identifier); + } + + @Override + public String toString() { + return ReflectionToStringBuilder.toString(this, ToStringStyle.JSON_STYLE); + } +} diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java index bae6f39dabd..ae7dfdec984 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java @@ -31,6 +31,10 @@ public class SessionData { private Long createTime; private Long duration; private Long idleTime; + private String exception; + private String sessionType; + private String kyuubiInstance; + private String engineId; public SessionData() {} @@ -41,7 +45,11 @@ public SessionData( Map conf, Long createTime, Long duration, - Long idleTime) { + Long idleTime, + String exception, + String sessionType, + String kyuubiInstance, + String engineId) { this.identifier = identifier; this.user = user; this.ipAddr = ipAddr; @@ -49,6 +57,10 @@ public SessionData( this.createTime = createTime; this.duration = duration; this.idleTime = idleTime; + this.exception = exception; + this.sessionType = sessionType; + this.kyuubiInstance = kyuubiInstance; + this.engineId = engineId; } public String getIdentifier() { @@ -110,6 +122,38 @@ public void setIdleTime(Long idleTime) { this.idleTime = idleTime; } + public String getException() { + return exception; + } + + public void setException(String exception) { + this.exception = exception; + } + + public String getSessionType() { + return sessionType; + } + + public void setSessionType(String sessionType) { + this.sessionType = sessionType; + } + + public String getKyuubiInstance() { + return kyuubiInstance; + } + + public void setKyuubiInstance(String kyuubiInstance) { + this.kyuubiInstance = kyuubiInstance; + } + + public String getEngineId() { + return engineId; + } + + public void setEngineId(String engineId) { + this.engineId = engineId; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionOpenRequest.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionOpenRequest.java index 2d23aac5717..06eb29e9723 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionOpenRequest.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionOpenRequest.java @@ -24,24 +24,14 @@ import org.apache.commons.lang3.builder.ToStringStyle; public class SessionOpenRequest { - private int protocolVersion; private Map configs; public SessionOpenRequest() {} - public SessionOpenRequest(int protocolVersion, Map configs) { - this.protocolVersion = protocolVersion; + public SessionOpenRequest(Map configs) { this.configs = configs; } - public int getProtocolVersion() { - return protocolVersion; - } - - public void setProtocolVersion(int protocolVersion) { - this.protocolVersion = protocolVersion; - } - public Map getConfigs() { if (null == configs) { return Collections.emptyMap(); @@ -58,13 +48,12 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SessionOpenRequest that = (SessionOpenRequest) o; - return getProtocolVersion() == that.getProtocolVersion() - && Objects.equals(getConfigs(), that.getConfigs()); + return Objects.equals(getConfigs(), that.getConfigs()); } @Override public int hashCode() { - return Objects.hash(getProtocolVersion(), getConfigs()); + return Objects.hash(getConfigs()); } @Override diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/StatementRequest.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/StatementRequest.java index 436017f3c1e..f2dc060d5ec 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/StatementRequest.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/StatementRequest.java @@ -17,6 +17,8 @@ package org.apache.kyuubi.client.api.v1.dto; +import java.util.Collections; +import java.util.Map; import java.util.Objects; import org.apache.commons.lang3.builder.ReflectionToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; @@ -25,13 +27,20 @@ public class StatementRequest { private String statement; private boolean runAsync; private Long queryTimeout; + private Map confOverlay; public StatementRequest() {} public StatementRequest(String statement, boolean runAsync, Long queryTimeout) { + this(statement, runAsync, queryTimeout, Collections.emptyMap()); + } + + public StatementRequest( + String statement, boolean runAsync, Long queryTimeout, Map confOverlay) { this.statement = statement; this.runAsync = runAsync; this.queryTimeout = queryTimeout; + this.confOverlay = confOverlay; } public String getStatement() { @@ -58,6 +67,17 @@ public void setQueryTimeout(Long queryTimeout) { this.queryTimeout = queryTimeout; } + public Map getConfOverlay() { + if (confOverlay == null) { + return Collections.emptyMap(); + } + return confOverlay; + } + + public void setConfOverlay(Map confOverlay) { + this.confOverlay = confOverlay; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/VersionInfo.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/VersionInfo.java index 427272f4195..5749c4e32ad 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/VersionInfo.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/VersionInfo.java @@ -17,6 +17,8 @@ package org.apache.kyuubi.client.api.v1.dto; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; import org.apache.commons.lang3.builder.ReflectionToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; @@ -26,10 +28,13 @@ public class VersionInfo { public VersionInfo() {} - public VersionInfo(String version) { + // Explicitly specifies JsonProperty to be compatible if disable auto detect feature + @JsonCreator + public VersionInfo(@JsonProperty("version") String version) { this.version = version; } + @JsonProperty public String getVersion() { return version; } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/BatchUtils.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/BatchUtils.java index 59f5967a0a6..f7efaad9dc3 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/BatchUtils.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/BatchUtils.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.List; import java.util.Locale; +import org.apache.kyuubi.client.api.v1.dto.Batch; public final class BatchUtils { /** The batch has not been submitted to resource manager yet. */ @@ -40,6 +41,10 @@ public final class BatchUtils { public static List terminalBatchStates = Arrays.asList(FINISHED_STATE, ERROR_STATE, CANCELED_STATE); + public static String KYUUBI_BATCH_ID_KEY = "kyuubi.batch.id"; + + public static String KYUUBI_BATCH_DUPLICATED_KEY = "kyuubi.batch.duplicated"; + public static boolean isPendingState(String state) { return PENDING_STATE.equalsIgnoreCase(state); } @@ -55,4 +60,8 @@ public static boolean isFinishedState(String state) { public static boolean isTerminalState(String state) { return state != null && terminalBatchStates.contains(state.toUpperCase(Locale.ROOT)); } + + public static boolean isDuplicatedSubmission(Batch batch) { + return "true".equalsIgnoreCase(batch.getBatchInfo().get(KYUUBI_BATCH_DUPLICATED_KEY)); + } } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java index f42849166d3..855a152803f 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java @@ -17,12 +17,14 @@ package org.apache.kyuubi.client.util; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.kyuubi.client.exception.KyuubiRestException; public final class JsonUtils { - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final ObjectMapper MAPPER = + new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); public static String toJson(Object object) { try { diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/VersionUtils.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/VersionUtils.java new file mode 100644 index 00000000000..bcabca5b9f8 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/VersionUtils.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.util; + +import java.util.Properties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class VersionUtils { + static final Logger LOG = LoggerFactory.getLogger(VersionUtils.class); + + public static final String KYUUBI_CLIENT_VERSION_KEY = "kyuubi.client.version"; + private static String KYUUBI_CLIENT_VERSION; + + public static synchronized String getVersion() { + if (KYUUBI_CLIENT_VERSION == null) { + try { + Properties prop = new Properties(); + prop.load(VersionUtils.class.getClassLoader().getResourceAsStream("version.properties")); + KYUUBI_CLIENT_VERSION = prop.getProperty(KYUUBI_CLIENT_VERSION_KEY, "unknown"); + } catch (Exception e) { + LOG.error("Error getting kyuubi client version", e); + KYUUBI_CLIENT_VERSION = "unknown"; + } + } + return KYUUBI_CLIENT_VERSION; + } +} diff --git a/kyuubi-rest-client/src/main/resources/version.properties b/kyuubi-rest-client/src/main/resources/version.properties new file mode 100644 index 00000000000..82ae50cfbf6 --- /dev/null +++ b/kyuubi-rest-client/src/main/resources/version.properties @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +kyuubi.client.version = ${project.version} diff --git a/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/RestClientTestUtils.java b/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/RestClientTestUtils.java index 82413e2a40e..1ac0278bf08 100644 --- a/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/RestClientTestUtils.java +++ b/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/RestClientTestUtils.java @@ -45,35 +45,31 @@ public static CloseBatchResponse generateTestCloseBatchResp() { } public static Batch generateTestBatch(String id) { - Batch batch = - new Batch( - id, - TEST_USERNAME, - "spark", - "batch_name", - 0, - id, - null, - "RUNNING", - null, - "192.168.31.130:64573", - "RUNNING", - BATCH_CREATE_TIME, - 0); - - return batch; + return new Batch( + id, + TEST_USERNAME, + "spark", + "batch_name", + 0, + id, + null, + "RUNNING", + null, + "192.168.31.130:64573", + "RUNNING", + BATCH_CREATE_TIME, + 0, + Collections.emptyMap()); } public static BatchRequest generateTestBatchRequest() { - BatchRequest batchRequest = - new BatchRequest( - "spark", - "/MySpace/kyuubi-spark-sql-engine_2.12-1.6.0-SNAPSHOT.jar", - "org.apache.kyuubi.engine.spark.SparkSQLEngine", - "test_batch", - Collections.singletonMap("spark.driver.memory", "16m"), - Collections.emptyList()); - return batchRequest; + return new BatchRequest( + "spark", + "/MySpace/kyuubi-spark-sql-engine_2.12-1.6.0-SNAPSHOT.jar", + "org.apache.kyuubi.engine.spark.SparkSQLEngine", + "test_batch", + Collections.singletonMap("spark.driver.memory", "16m"), + Collections.emptyList()); } public static GetBatchesResponse generateTestBatchesResponse() { @@ -87,9 +83,8 @@ public static GetBatchesResponse generateTestBatchesResponse() { public static OperationLog generateTestOperationLog() { List logs = Arrays.asList( - "13:15:13.523 INFO org.apache.curator.framework.state." - + "ConnectionStateManager: State change: CONNECTED", - "13:15:13.528 INFO org.apache.kyuubi." + "engine.EngineRef: Launching engine:"); + "13:15:13.523 INFO ConnectionStateManager: State change: CONNECTED", + "13:15:13.528 INFO EngineRef: Launching engine:"); return new OperationLog(logs, 2); } } diff --git a/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/util/VersionUtilsTest.java b/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/util/VersionUtilsTest.java new file mode 100644 index 00000000000..d4675f34039 --- /dev/null +++ b/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/util/VersionUtilsTest.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.util; + +import java.util.regex.Pattern; +import org.junit.Test; + +public class VersionUtilsTest { + + @Test + public void testGetClientVersion() { + Pattern pattern = Pattern.compile("^\\d+\\.\\d+\\.\\d+.*"); + assert pattern.matcher(VersionUtils.getVersion()).matches(); + } +} diff --git a/kyuubi-server/pom.xml b/kyuubi-server/pom.xml index 748d8d02819..7408ac5dd00 100644 --- a/kyuubi-server/pom.xml +++ b/kyuubi-server/pom.xml @@ -21,7 +21,7 @@ org.apache.kyuubi kyuubi-parent - 1.7.0-SNAPSHOT + 1.8.0-SNAPSHOT kyuubi-server_2.12 @@ -36,6 +36,12 @@ ${project.version} + + org.apache.kyuubi + kyuubi-hive-jdbc + ${project.version} + + org.apache.kyuubi kyuubi-events_${scala.binary.version} @@ -92,6 +98,11 @@ kubernetes-client + + io.fabric8 + kubernetes-httpclient-okhttp + + org.apache.hive hive-metastore @@ -216,6 +227,21 @@ jersey-media-json-jackson + + org.glassfish.jersey.media + jersey-media-multipart + + + + com.fasterxml.jackson.core + jackson-databind + + + + com.fasterxml.jackson.datatype + jackson-datatype-jdk8 + + com.zaxxer HikariCP @@ -490,6 +516,12 @@ jetcd-launcher test + + + com.vladsch.flexmark + flexmark-all + test + @@ -522,6 +554,47 @@
              + + + com.github.eirslett + frontend-maven-plugin + + web-ui + + + + install node and pnpm + + install-node-and-pnpm + + + ${webui.skip} + + + + pnpm install + + pnpm + + generate-resources + + ${webui.skip} + install + + + + pnpm run build + + pnpm + + package + + ${webui.skip} + run build + + + +
              target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseLexer.g4 b/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseLexer.g4 index 0b9543a430c..83810a073c7 100644 --- a/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseLexer.g4 +++ b/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseLexer.g4 @@ -43,6 +43,10 @@ FALSE: 'FALSE'; LIKE: 'LIKE'; IN: 'IN'; WHERE: 'WHERE'; +EXECUTE: 'EXECUTE'; +PREPARE: 'PREPARE'; +DEALLOCATE: 'DEALLOCATE'; +USING: 'USING'; ESCAPE: 'ESCAPE'; AUTO_INCREMENT: 'AUTO_INCREMENT'; @@ -97,6 +101,21 @@ SCOPE_TABLE: 'SCOPE_TABLE'; SOURCE_DATA_TYPE: 'SOURCE_DATA_TYPE'; IS_AUTOINCREMENT: 'IS_AUTOINCREMENT'; IS_GENERATEDCOLUMN: 'IS_GENERATEDCOLUMN'; +VARCHAR: 'VARCHAR'; +TINYINT: 'TINYINT'; +SMALLINT: 'SMALLINT'; +INTEGER: 'INTEGER'; +BIGINT: 'BIGINT'; +REAL: 'REAL'; +DOUBLE: 'DOUBLE'; +DECIMAL: 'DECIMAL'; +DATE: 'DATE'; +TIME: 'TIME'; +TIMESTAMP: 'TIMESTAMP'; +CAST: 'CAST'; +AS: 'AS'; +KEY_SEQ: 'KEY_SEQ'; +PK_NAME: 'PK_NAME'; fragment SEARCH_STRING_ESCAPE: '\'' '\\' '\''; @@ -108,6 +127,10 @@ STRING : '\'' ( ~'\'' | '\'\'' )* '\'' ; +STRING_MARK + : '\'' + ; + SIMPLE_COMMENT : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) ; @@ -119,6 +142,10 @@ BRACKETED_COMMENT WS : [ \r\n\t]+ -> channel(HIDDEN) ; +IDENTIFIER + : [A-Za-z_$0-9\u0080-\uFFFF]*?[A-Za-z_$\u0080-\uFFFF]+?[A-Za-z_$0-9\u0080-\uFFFF]* + ; + // Catch-all for anything we can't recognize. // We use this to be able to ignore and recover all the text // when splitting statements with DelimiterLexer diff --git a/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseParser.g4 b/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseParser.g4 index 590c4378d52..72811e59231 100644 --- a/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseParser.g4 +++ b/kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiTrinoFeBaseParser.g4 @@ -47,9 +47,27 @@ statement SOURCE_DATA_TYPE COMMA IS_AUTOINCREMENT COMMA IS_GENERATEDCOLUMN FROM SYSTEM_JDBC_COLUMNS (WHERE tableCatalogFilter? AND? tableSchemaFilter? AND? tableNameFilter? AND? colNameFilter?)? ORDER BY TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME COMMA ORDINAL_POSITION #getColumns + | SELECT CAST LEFT_PAREN NULL AS VARCHAR RIGHT_PAREN TABLE_CAT COMMA + CAST LEFT_PAREN NULL AS VARCHAR RIGHT_PAREN TABLE_SCHEM COMMA + CAST LEFT_PAREN NULL AS VARCHAR RIGHT_PAREN TABLE_NAME COMMA + CAST LEFT_PAREN NULL AS VARCHAR RIGHT_PAREN COLUMN_NAME COMMA + CAST LEFT_PAREN NULL AS SMALLINT RIGHT_PAREN KEY_SEQ COMMA + CAST LEFT_PAREN NULL AS VARCHAR RIGHT_PAREN PK_NAME + WHERE FALSE #getPrimaryKeys + | EXECUTE IDENTIFIER (USING parameterList)? #execute + | PREPARE IDENTIFIER FROM statement #prepare + | DEALLOCATE PREPARE IDENTIFIER #deallocate | .*? #passThrough ; +anyStr + : ( ~',' )* + ; + +parameterList + : (TINYINT|SMALLINT|INTEGER|BIGINT|DOUBLE|REAL|DECIMAL|DATE|TIME|TIMESTAMP)? anyStr (',' (TINYINT|SMALLINT|INTEGER|BIGINT|DOUBLE|REAL|DECIMAL|DATE|TIME|TIMESTAMP)? anyStr)* + ; + tableCatalogFilter : (TABLE_CAT | TABLE_CATALOG) IS NULL #nullCatalog | (TABLE_CAT | TABLE_CATALOG) EQ catalog=STRING+ #catalogFilter diff --git a/kyuubi-server/src/main/resources/dist/index.html b/kyuubi-server/src/main/resources/dist/index.html new file mode 100644 index 00000000000..ab54fc14a6a --- /dev/null +++ b/kyuubi-server/src/main/resources/dist/index.html @@ -0,0 +1,28 @@ + + + + + + + Apache Kyuubi Dashboard + + +
              This is a dummy page for development.
              + + diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala index babe737456c..8b8561fa99f 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.client +import java.util.UUID import java.util.concurrent.{ExecutorService, ScheduledExecutorService, TimeUnit} import java.util.concurrent.locks.ReentrantLock @@ -53,8 +54,12 @@ class KyuubiSyncThriftClient private ( private val lock = new ReentrantLock() + // Visible for testing. + private[kyuubi] def remoteSessionHandle: TSessionHandle = _remoteSessionHandle + @volatile private var _aliveProbeSessionHandle: TSessionHandle = _ @volatile private var remoteEngineBroken: Boolean = false + @volatile private var clientClosedOnEngineBroken: Boolean = false private val engineAliveProbeClient = engineAliveProbeProtocol.map(new TCLIService.Client(_)) private var engineAliveThreadPool: ScheduledExecutorService = _ @volatile private var engineLastAlive: Long = _ @@ -105,6 +110,18 @@ class KyuubiSyncThriftClient private ( } } else { shutdownAsyncRequestExecutor() + warn(s"Removing Clients for ${_remoteSessionHandle}") + Seq(protocol).union(engineAliveProbeProtocol.toSeq).foreach { tProtocol => + Utils.tryLogNonFatalError { + if (tProtocol.getTransport.isOpen) { + tProtocol.getTransport.close() + } + } + clientClosedOnEngineBroken = true + Option(engineAliveThreadPool).foreach { pool => + ThreadUtils.shutdown(pool, Duration(engineAliveProbeInterval, TimeUnit.MILLISECONDS)) + } + } } } } @@ -180,7 +197,10 @@ class KyuubiSyncThriftClient private ( engineAliveProbeClient.foreach { aliveProbeClient => val sessionName = SessionHandle.apply(_remoteSessionHandle).identifier + "_aliveness_probe" Utils.tryLogNonFatalError { - req.setConfiguration((configs ++ Map(KyuubiConf.SESSION_NAME.key -> sessionName)).asJava) + req.setConfiguration((configs ++ Map( + KyuubiConf.SESSION_NAME.key -> sessionName, + KYUUBI_SESSION_HANDLE_KEY -> UUID.randomUUID().toString, + KyuubiConf.ENGINE_SESSION_INITIALIZE_SQL.key -> "")).asJava) val resp = aliveProbeClient.OpenSession(req) ThriftUtils.verifyTStatus(resp.getStatus) _aliveProbeSessionHandle = resp.getSessionHandle @@ -192,6 +212,7 @@ class KyuubiSyncThriftClient private ( } def closeSession(): Unit = { + if (clientClosedOnEngineBroken) return try { if (_remoteSessionHandle != null) { val req = new TCloseSessionReq(_remoteSessionHandle) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala index 93d495895ad..a2b3d0f7616 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala @@ -56,9 +56,10 @@ trait ApplicationOperation { * Get the engine/application status by the unique application tag * * @param tag the unique application tag for engine instance. + * @param submitTime engine submit to resourceManager time * @return [[ApplicationInfo]] */ - def getApplicationInfoByTag(tag: String): ApplicationInfo + def getApplicationInfoByTag(tag: String, submitTime: Option[Long] = None): ApplicationInfo } object ApplicationState extends Enumeration { @@ -99,6 +100,11 @@ case class ApplicationInfo( } } +object ApplicationInfo { + val NOT_FOUND: ApplicationInfo = ApplicationInfo(null, null, ApplicationState.NOT_FOUND) + val UNKNOWN: ApplicationInfo = ApplicationInfo(null, null, ApplicationState.UNKNOWN) +} + object ApplicationOperation { val NOT_FOUND = "APPLICATION_NOT_FOUND" } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index 565f41ff295..63b37f1c5d8 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -19,6 +19,7 @@ package org.apache.kyuubi.engine import java.util.concurrent.TimeUnit +import scala.collection.JavaConverters._ import scala.util.Random import com.codahale.metrics.MetricRegistry @@ -28,8 +29,9 @@ import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException, Logging, Utils} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_ENGINE_SUBMIT_TIME_KEY -import org.apache.kyuubi.engine.EngineType.{EngineType, FLINK_SQL, HIVE_SQL, JDBC, SPARK_SQL, TRINO} +import org.apache.kyuubi.engine.EngineType._ import org.apache.kyuubi.engine.ShareLevel.{CONNECTION, GROUP, SERVER, ShareLevel} +import org.apache.kyuubi.engine.chat.ChatProcessBuilder import org.apache.kyuubi.engine.flink.FlinkProcessBuilder import org.apache.kyuubi.engine.hive.HiveProcessBuilder import org.apache.kyuubi.engine.jdbc.JdbcProcessBuilder @@ -40,6 +42,7 @@ import org.apache.kyuubi.ha.client.{DiscoveryClient, DiscoveryClientProvider, Di import org.apache.kyuubi.metrics.MetricsConstants.{ENGINE_FAIL, ENGINE_TIMEOUT, ENGINE_TOTAL} import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.plugin.GroupProvider /** * The description and functionality of an engine at server side @@ -51,7 +54,7 @@ import org.apache.kyuubi.operation.log.OperationLog private[kyuubi] class EngineRef( conf: KyuubiConf, user: String, - primaryGroup: String, + groupProvider: GroupProvider, engineRefId: String, engineManager: KyuubiApplicationManager) extends Logging { @@ -74,7 +77,7 @@ private[kyuubi] class EngineRef( private val enginePoolIgnoreSubdomain: Boolean = conf.get(ENGINE_POOL_IGNORE_SUBDOMAIN) - private val enginePoolBalancePolicy: String = conf.get(ENGINE_POOL_BALANCE_POLICY) + private val enginePoolSelectPolicy: String = conf.get(ENGINE_POOL_SELECT_POLICY) // In case the multi kyuubi instances have the small gap of timeout, here we add // a small amount of time for timeout @@ -82,10 +85,12 @@ private[kyuubi] class EngineRef( private var builder: ProcBuilder = _ + private[kyuubi] def getEngineRefId(): String = engineRefId + // Launcher of the engine private[kyuubi] val appUser: String = shareLevel match { case SERVER => Utils.currentUser - case GROUP => primaryGroup + case GROUP => groupProvider.primaryGroup(user, conf.getAll.asJava) case _ => user } @@ -97,12 +102,11 @@ private[kyuubi] class EngineRef( warn(s"Request engine pool size($clientPoolSize) exceeds, fallback to " + s"system threshold $poolThreshold") } - val seqNum = enginePoolBalancePolicy match { + val seqNum = enginePoolSelectPolicy match { case "POLLING" => val snPath = DiscoveryPaths.makePath( - s"${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_$engineType", - "seq_num", + s"${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_${engineType}_seqNum", appUser, clientPoolName) DiscoveryClientProvider.withDiscoveryClient(conf) { client => @@ -159,8 +163,7 @@ private[kyuubi] class EngineRef( case _ => val lockPath = DiscoveryPaths.makePath( - s"${serverSpace}_${shareLevel}_$engineType", - "lock", + s"${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_${engineType}_lock", appUser, subdomain) discoveryClient.tryWithLock( @@ -192,6 +195,8 @@ private[kyuubi] class EngineRef( new HiveProcessBuilder(appUser, conf, engineRefId, extraEngineLog) case JDBC => new JdbcProcessBuilder(appUser, conf, engineRefId, extraEngineLog) + case CHAT => + new ChatProcessBuilder(appUser, conf, engineRefId, extraEngineLog) } MetricsSystem.tracing(_.incCount(ENGINE_TOTAL)) @@ -218,7 +223,10 @@ private[kyuubi] class EngineRef( // check the engine application state from engine manager and fast fail on engine terminate if (exitValue == Some(0)) { Option(engineManager).foreach { engineMgr => - engineMgr.getApplicationInfo(builder.clusterManager(), engineRefId).foreach { appInfo => + engineMgr.getApplicationInfo( + builder.clusterManager(), + engineRefId, + Some(started)).foreach { appInfo => if (ApplicationState.isTerminated(appInfo.state)) { MetricsSystem.tracing { ms => ms.incCount(MetricRegistry.name(ENGINE_FAIL, appUser)) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/JpsApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/JpsApplicationOperation.scala index bd482b86bf5..ce2e054617a 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/JpsApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/JpsApplicationOperation.scala @@ -84,7 +84,7 @@ class JpsApplicationOperation extends ApplicationOperation { killJpsApplicationByTag(tag, true) } - override def getApplicationInfoByTag(tag: String): ApplicationInfo = { + override def getApplicationInfoByTag(tag: String, submitTime: Option[Long]): ApplicationInfo = { val commandOption = getEngine(tag) if (commandOption.nonEmpty) { val idAndCmd = commandOption.get diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala index bee69b11762..83792f52f79 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala @@ -17,30 +17,54 @@ package org.apache.kyuubi.engine -import io.fabric8.kubernetes.api.model.{Pod, PodList} +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} + +import com.google.common.cache.{Cache, CacheBuilder, RemovalNotification} +import io.fabric8.kubernetes.api.model.Pod import io.fabric8.kubernetes.client.KubernetesClient -import io.fabric8.kubernetes.client.dsl.FilterWatchListDeletable +import io.fabric8.kubernetes.client.informers.{ResourceEventHandler, SharedIndexInformer} -import org.apache.kyuubi.Logging +import org.apache.kyuubi.{Logging, Utils} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.engine.ApplicationState.{ApplicationState, FAILED, FINISHED, PENDING, RUNNING, UNKNOWN} -import org.apache.kyuubi.engine.KubernetesApplicationOperation.{toApplicationState, SPARK_APP_ID_LABEL} +import org.apache.kyuubi.engine.ApplicationState.{isTerminated, ApplicationState, FAILED, FINISHED, NOT_FOUND, PENDING, RUNNING, UNKNOWN} +import org.apache.kyuubi.engine.KubernetesApplicationOperation.{toApplicationState, LABEL_KYUUBI_UNIQUE_KEY, SPARK_APP_ID_LABEL} import org.apache.kyuubi.util.KubernetesUtils class KubernetesApplicationOperation extends ApplicationOperation with Logging { @volatile private var kubernetesClient: KubernetesClient = _ - private var jpsOperation: JpsApplicationOperation = _ + private var enginePodInformer: SharedIndexInformer[Pod] = _ + private var submitTimeout: Long = _ - override def initialize(conf: KyuubiConf): Unit = { - jpsOperation = new JpsApplicationOperation - jpsOperation.initialize(conf) + // key is kyuubi_unique_key + private val appInfoStore: ConcurrentHashMap[String, ApplicationInfo] = + new ConcurrentHashMap[String, ApplicationInfo] + // key is kyuubi_unique_key + private var cleanupTerminatedAppInfoTrigger: Cache[String, ApplicationState] = _ + override def initialize(conf: KyuubiConf): Unit = { info("Start initializing Kubernetes Client.") kubernetesClient = KubernetesUtils.buildKubernetesClient(conf) match { case Some(client) => info(s"Initialized Kubernetes Client connect to: ${client.getMasterUrl}") + submitTimeout = conf.get(KyuubiConf.ENGINE_SUBMIT_TIMEOUT) + // Disable resync, see https://github.com/fabric8io/kubernetes-client/discussions/5015 + enginePodInformer = client.pods() + .withLabel(LABEL_KYUUBI_UNIQUE_KEY) + .inform(new SparkEnginePodEventHandler) + info("Start Kubernetes Client Informer.") + // Defer cleaning terminated application information + val retainPeriod = conf.get(KyuubiConf.KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD) + cleanupTerminatedAppInfoTrigger = CacheBuilder.newBuilder() + .expireAfterWrite(retainPeriod, TimeUnit.MILLISECONDS) + .removalListener((notification: RemovalNotification[String, ApplicationState]) => { + Option(appInfoStore.remove(notification.getKey)).foreach { removed => + info(s"Remove terminated application ${removed.id} with " + + s"tag ${notification.getKey} and state ${removed.state}") + } + }) + .build() client case None => warn("Fail to init Kubernetes Client for Kubernetes Application Operation") @@ -49,89 +73,136 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { } override def isSupported(clusterManager: Option[String]): Boolean = { + // TODO add deploy mode to check whether is supported kubernetesClient != null && clusterManager.nonEmpty && clusterManager.get.toLowerCase.startsWith("k8s") } override def killApplicationByTag(tag: String): KillResponse = { - if (kubernetesClient != null) { - debug(s"Deleting application info from Kubernetes cluster by $tag tag") - try { - // Need driver only - val operation = findDriverPodByTag(tag) - val podList = operation.list().getItems - if (podList.size() != 0) { - toApplicationState(podList.get(0).getStatus.getPhase) match { - case FAILED | UNKNOWN => - ( - false, - s"Target Pod ${podList.get(0).getMetadata.getName} is in FAILED or UNKNOWN status") - case _ => - ( - operation.delete(), - s"Operation of deleted appId: ${podList.get(0).getMetadata.getName} is completed") - } - } else { - // client mode - jpsOperation.killApplicationByTag(tag) - } - } catch { - case e: Exception => - (false, s"Failed to terminate application with $tag, due to ${e.getMessage}") - } - } else { + if (kubernetesClient == null) { throw new IllegalStateException("Methods initialize and isSupported must be called ahead") } - } - - override def getApplicationInfoByTag(tag: String): ApplicationInfo = { - if (kubernetesClient != null) { - debug(s"Getting application info from Kubernetes cluster by $tag tag") - try { - val operation = findDriverPodByTag(tag) - val podList = operation.list().getItems - if (podList.size() != 0) { - val pod = podList.get(0) - val info = ApplicationInfo( - // spark pods always tag label `spark-app-selector:` - id = pod.getMetadata.getLabels.get(SPARK_APP_ID_LABEL), - name = pod.getMetadata.getName, - state = KubernetesApplicationOperation.toApplicationState(pod.getStatus.getPhase), - error = Option(pod.getStatus.getReason)) - debug(s"Successfully got application info by $tag: $info") - info - } else { - // client mode - jpsOperation.getApplicationInfoByTag(tag) - } - } catch { - case e: Exception => - error(s"Failed to get application with $tag, due to ${e.getMessage}") - ApplicationInfo(id = null, name = null, ApplicationState.NOT_FOUND) + debug(s"Deleting application info from Kubernetes cluster by $tag tag") + try { + val info = appInfoStore.getOrDefault(tag, ApplicationInfo.NOT_FOUND) + debug(s"Application info[tag: $tag] is in ${info.state}") + info.state match { + case NOT_FOUND | FAILED | UNKNOWN => + ( + false, + s"Target application[tag: $tag] is in ${info.state} status") + case _ => + ( + !kubernetesClient.pods.withName(info.name).delete().isEmpty, + s"Operation of deleted application[appId: ${info.id} ,tag: $tag] is completed") } - } else { - throw new IllegalStateException("Methods initialize and isSupported must be called ahead") + } catch { + case e: Exception => + (false, s"Failed to terminate application with $tag, due to ${e.getMessage}") } } - private def findDriverPodByTag(tag: String): FilterWatchListDeletable[Pod, PodList] = { - val operation = kubernetesClient.pods() - .withLabel(KubernetesApplicationOperation.LABEL_KYUUBI_UNIQUE_KEY, tag) - val size = operation.list().getItems.size() - if (size != 1) { - warn(s"Get Tag: ${tag} Driver Pod In Kubernetes size: ${size}, we expect 1") + override def getApplicationInfoByTag(tag: String, submitTime: Option[Long]): ApplicationInfo = { + if (kubernetesClient == null) { + throw new IllegalStateException("Methods initialize and isSupported must be called ahead") + } + debug(s"Getting application info from Kubernetes cluster by $tag tag") + try { + val appInfo = appInfoStore.getOrDefault(tag, ApplicationInfo.NOT_FOUND) + (appInfo.state, submitTime) match { + // Kyuubi should wait second if pod is not be created + case (NOT_FOUND, Some(_submitTime)) => + val elapsedTime = System.currentTimeMillis - _submitTime + if (elapsedTime > submitTimeout) { + error(s"Can't find target driver pod by tag: $tag, " + + s"elapsed time: ${elapsedTime}ms exceeds ${submitTimeout}ms.") + ApplicationInfo.NOT_FOUND + } else { + warn("Wait for driver pod to be created, " + + s"elapsed time: ${elapsedTime}ms, return UNKNOWN status") + ApplicationInfo.UNKNOWN + } + case (NOT_FOUND, None) => + ApplicationInfo.NOT_FOUND + case _ => + debug(s"Successfully got application info by $tag: $appInfo") + appInfo + } + } catch { + case e: Exception => + error(s"Failed to get application with $tag, due to ${e.getMessage}") + ApplicationInfo.NOT_FOUND } - operation } override def stop(): Unit = { - if (kubernetesClient != null) { - try { + Utils.tryLogNonFatalError { + if (enginePodInformer != null) { + enginePodInformer.stop() + enginePodInformer = null + } + } + + Utils.tryLogNonFatalError { + if (kubernetesClient != null) { kubernetesClient.close() - } catch { - case e: Exception => error(e.getMessage) + kubernetesClient = null } } + + if (cleanupTerminatedAppInfoTrigger != null) { + cleanupTerminatedAppInfoTrigger.cleanUp() + cleanupTerminatedAppInfoTrigger = null + } + } + + private class SparkEnginePodEventHandler extends ResourceEventHandler[Pod] { + + override def onAdd(pod: Pod): Unit = { + if (isSparkEnginePod(pod)) { + updateApplicationState(pod) + } + } + + override def onUpdate(oldPod: Pod, newPod: Pod): Unit = { + if (isSparkEnginePod(newPod)) { + updateApplicationState(newPod) + val appState = toApplicationState(newPod.getStatus.getPhase) + if (isTerminated(appState)) { + markApplicationTerminated(newPod) + } + } + } + + override def onDelete(pod: Pod, deletedFinalStateUnknown: Boolean): Unit = { + if (isSparkEnginePod(pod)) { + updateApplicationState(pod) + markApplicationTerminated(pod) + } + } + } + + private def isSparkEnginePod(pod: Pod): Boolean = { + val labels = pod.getMetadata.getLabels + labels.containsKey(LABEL_KYUUBI_UNIQUE_KEY) && labels.containsKey(SPARK_APP_ID_LABEL) + } + + private def updateApplicationState(pod: Pod): Unit = { + val appState = toApplicationState(pod.getStatus.getPhase) + debug(s"Driver Informer changes pod: ${pod.getMetadata.getName} to state: $appState") + appInfoStore.put( + pod.getMetadata.getLabels.get(LABEL_KYUUBI_UNIQUE_KEY), + ApplicationInfo( + id = pod.getMetadata.getLabels.get(SPARK_APP_ID_LABEL), + name = pod.getMetadata.getName, + state = appState, + error = Option(pod.getStatus.getReason))) + } + + private def markApplicationTerminated(pod: Pod): Unit = { + cleanupTerminatedAppInfoTrigger.put( + pod.getMetadata.getLabels.get(LABEL_KYUUBI_UNIQUE_KEY), + toApplicationState(pod.getStatus.getPhase)) } } @@ -148,10 +219,10 @@ object KubernetesApplicationOperation extends Logging { case "Running" => RUNNING case "Succeeded" => FINISHED case "Failed" | "Error" => FAILED - case "Unknown" => ApplicationState.UNKNOWN + case "Unknown" => UNKNOWN case _ => warn(s"The kubernetes driver pod state: $state is not supported, " + "mark the application state as UNKNOWN.") - ApplicationState.UNKNOWN + UNKNOWN } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KyuubiApplicationManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KyuubiApplicationManager.scala index 481d7a2f17c..9b23e550d07 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KyuubiApplicationManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KyuubiApplicationManager.scala @@ -19,6 +19,7 @@ package org.apache.kyuubi.engine import java.io.File import java.net.{URI, URISyntaxException} +import java.nio.file.{Files, Path} import java.util.{Locale, ServiceLoader} import scala.collection.JavaConverters._ @@ -83,10 +84,11 @@ class KyuubiApplicationManager extends AbstractService("KyuubiApplicationManager def getApplicationInfo( clusterManager: Option[String], - tag: String): Option[ApplicationInfo] = { + tag: String, + submitTime: Option[Long] = None): Option[ApplicationInfo] = { val operation = operations.find(_.isSupported(clusterManager)) operation match { - case Some(op) => Some(op.getApplicationInfoByTag(tag)) + case Some(op) => Some(op.getApplicationInfoByTag(tag, submitTime)) case None => None } } @@ -109,6 +111,15 @@ object KyuubiApplicationManager { conf.set(FlinkProcessBuilder.TAG_KEY, newTag) } + val uploadWorkDir: Path = { + val path = Utils.getAbsolutePathFromWork("upload") + val pathFile = path.toFile + if (!pathFile.exists()) { + Files.createDirectories(path) + } + path + } + private[kyuubi] def checkApplicationAccessPath(path: String, conf: KyuubiConf): Unit = { val localDirAllowList = conf.get(KyuubiConf.SESSION_LOCAL_DIR_ALLOW_LIST) if (localDirAllowList.nonEmpty) { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala index 5b69b02f54d..4c7330b4dd5 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala @@ -118,7 +118,7 @@ trait ProcBuilder { env.get("KYUUBI_WORK_DIR_ROOT").map { root => val workingRoot = Paths.get(root).toAbsolutePath if (!Files.exists(workingRoot)) { - debug(s"Creating KYUUBI_WORK_DIR_ROOT at $workingRoot") + info(s"Creating KYUUBI_WORK_DIR_ROOT at $workingRoot") Files.createDirectories(workingRoot) } if (Files.isDirectory(workingRoot)) { @@ -127,7 +127,7 @@ trait ProcBuilder { }.map { rootAbs => val working = Paths.get(rootAbs, proxyUser) if (!Files.exists(working)) { - debug(s"Creating $proxyUser's working directory at $working") + info(s"Creating $proxyUser's working directory at $working") Files.createDirectories(working) } if (Files.isDirectory(working)) { @@ -335,7 +335,7 @@ trait ProcBuilder { protected def validateEnv(requiredEnv: String): Throwable = { KyuubiSQLException(s"$requiredEnv is not set! For more information on installing and " + - s"configuring $requiredEnv, please visit https://kyuubi.apache.org/docs/latest/" + + s"configuring $requiredEnv, please visit https://kyuubi.readthedocs.io/en/master/" + s"deployment/settings.html#environments") } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/YarnApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/YarnApplicationOperation.scala index b38b1daa222..e836e65da99 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/YarnApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/YarnApplicationOperation.scala @@ -75,7 +75,7 @@ class YarnApplicationOperation extends ApplicationOperation with Logging { } } - override def getApplicationInfoByTag(tag: String): ApplicationInfo = { + override def getApplicationInfoByTag(tag: String, submitTime: Option[Long]): ApplicationInfo = { if (yarnClient != null) { debug(s"Getting application info from Yarn cluster by $tag tag") val reports = yarnClient.getApplications(null, null, Set(tag).asJava) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala new file mode 100644 index 00000000000..3e4a20de373 --- /dev/null +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat + +import java.io.File +import java.nio.file.{Files, Paths} +import java.util + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +import com.google.common.annotations.VisibleForTesting + +import org.apache.kyuubi.{Logging, SCALA_COMPILE_VERSION, Utils} +import org.apache.kyuubi.Utils.REDACTION_REPLACEMENT_TEXT +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY +import org.apache.kyuubi.engine.ProcBuilder +import org.apache.kyuubi.operation.log.OperationLog + +class ChatProcessBuilder( + override val proxyUser: String, + override val conf: KyuubiConf, + val engineRefId: String, + val extraEngineLog: Option[OperationLog] = None) + extends ProcBuilder with Logging { + + @VisibleForTesting + def this(proxyUser: String, conf: KyuubiConf) { + this(proxyUser, conf, "") + } + + /** + * The short name of the engine process builder, we use this for form the engine jar paths now + * see `mainResource` + */ + override def shortName: String = "chat" + + override protected def module: String = "kyuubi-chat-engine" + + /** + * The class containing the main method + */ + override protected def mainClass: String = "org.apache.kyuubi.engine.chat.ChatEngine" + + override protected val commands: Array[String] = { + val buffer = new ArrayBuffer[String]() + buffer += executable + + val memory = conf.get(ENGINE_CHAT_MEMORY) + buffer += s"-Xmx$memory" + + val javaOptions = conf.get(ENGINE_CHAT_JAVA_OPTIONS) + javaOptions.foreach(buffer += _) + + buffer += "-cp" + val classpathEntries = new util.LinkedHashSet[String] + mainResource.foreach(classpathEntries.add) + mainResource.foreach { path => + val parent = Paths.get(path).getParent + val chatDevDepDir = parent + .resolve(s"scala-$SCALA_COMPILE_VERSION") + .resolve("jars") + if (Files.exists(chatDevDepDir)) { + // add dev classpath + classpathEntries.add(s"$chatDevDepDir${File.separator}*") + } else { + // add prod classpath + classpathEntries.add(s"$parent${File.separator}*") + } + } + + val extraCp = conf.get(ENGINE_CHAT_EXTRA_CLASSPATH) + extraCp.foreach(classpathEntries.add) + buffer += classpathEntries.asScala.mkString(File.pathSeparator) + buffer += mainClass + + buffer += "--conf" + buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" + + conf.getAll.foreach { case (k, v) => + buffer += "--conf" + buffer += s"$k=$v" + } + buffer.toArray + } + + override def toString: String = { + if (commands == null) { + super.toString() + } else { + Utils.redactCommandLineArgs(conf, commands).map { + case arg if arg.startsWith("-") || arg == mainClass => s"\\\n\t$arg" + case arg if arg.contains(ENGINE_CHAT_GPT_API_KEY.key) => + s"${ENGINE_CHAT_GPT_API_KEY.key}=$REDACTION_REPLACEMENT_TEXT" + case arg => arg + }.mkString(" ") + } + } +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala index 98f9ea5a335..4a613278dcb 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala @@ -56,9 +56,7 @@ class SparkBatchProcessBuilder( buffer += s"${convertConfigKey(k)}=$v" } - setSparkUserName(proxyUser, buffer) - buffer += PROXY_USER - buffer += proxyUser + setupKerberos(buffer) assert(mainResource.isDefined) buffer += mainResource.get @@ -77,6 +75,6 @@ class SparkBatchProcessBuilder( override protected def module: String = "kyuubi-spark-batch-submit" override def clusterManager(): Option[String] = { - batchConf.get(MASTER_KEY).orElse(defaultMaster) + batchConf.get(MASTER_KEY).orElse(super.clusterManager()) } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala index 874a36c0016..b74eab77d05 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala @@ -121,6 +121,16 @@ class SparkProcessBuilder( buffer += s"${convertConfigKey(k)}=$v" } + setupKerberos(buffer) + + mainResource.foreach { r => buffer += r } + + buffer.toArray + } + + override protected def module: String = "kyuubi-spark-sql-engine" + + protected def setupKerberos(buffer: ArrayBuffer[String]): Unit = { // if the keytab is specified, PROXY_USER is not supported tryKeytab() match { case None => @@ -130,14 +140,8 @@ class SparkProcessBuilder( case Some(name) => setSparkUserName(name, buffer) } - - mainResource.foreach { r => buffer += r } - - buffer.toArray } - override protected def module: String = "kyuubi-spark-sql-engine" - private def tryKeytab(): Option[String] = { val principal = conf.getOption(PRINCIPAL) val keytab = conf.getOption(KEYTAB) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/events/KyuubiOperationEvent.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/events/KyuubiOperationEvent.scala index 74a3a3fad39..7147cb42450 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/events/KyuubiOperationEvent.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/events/KyuubiOperationEvent.scala @@ -42,6 +42,7 @@ import org.apache.kyuubi.session.KyuubiSession * @param sessionId the identifier of the parent session * @param sessionUser the authenticated client user * @param sessionType the type of the parent session + * @param kyuubiInstance the parent session connection url */ case class KyuubiOperationEvent private ( statementId: String, @@ -56,7 +57,8 @@ case class KyuubiOperationEvent private ( exception: Option[Throwable], sessionId: String, sessionUser: String, - sessionType: String) extends KyuubiEvent { + sessionType: String, + kyuubiInstance: String) extends KyuubiEvent { // operation events are partitioned by the date when the corresponding operations are // created. @@ -85,6 +87,7 @@ object KyuubiOperationEvent { status.exception, session.handle.identifier.toString, session.user, - session.sessionType.toString) + session.sessionType.toString, + session.connectionUrl) } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala index e99b3292c36..3cbb16907bc 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala @@ -18,6 +18,7 @@ package org.apache.kyuubi.operation import java.io.IOException +import java.nio.file.{Files, Paths} import java.util.Locale import java.util.concurrent.TimeUnit @@ -32,7 +33,7 @@ import org.apache.kyuubi.engine.spark.SparkBatchProcessBuilder import org.apache.kyuubi.metrics.MetricsConstants.OPERATION_OPEN import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation -import org.apache.kyuubi.operation.OperationState.{CANCELED, OperationState, RUNNING} +import org.apache.kyuubi.operation.OperationState.{isTerminal, CANCELED, OperationState, RUNNING} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.KyuubiBatchSessionImpl @@ -69,7 +70,11 @@ class BatchJobSubmission( private[kyuubi] val batchId: String = session.handle.identifier.toString - private var applicationInfo: Option[ApplicationInfo] = None + @volatile private var _applicationInfo: Option[ApplicationInfo] = None + def getOrFetchCurrentApplicationInfo: Option[ApplicationInfo] = _applicationInfo match { + case Some(_) => _applicationInfo + case None => currentApplicationInfo + } private var killMessage: KillResponse = (false, "UNKNOWN") def getKillMessage: KillResponse = killMessage @@ -97,10 +102,19 @@ class BatchJobSubmission( } } - override private[kyuubi] def currentApplicationInfo: Option[ApplicationInfo] = { + override protected def currentApplicationInfo: Option[ApplicationInfo] = { + if (isTerminal(state) && _applicationInfo.nonEmpty) return _applicationInfo // only the ApplicationInfo with non-empty id is valid for the operation + val submitTime = if (_appStartTime <= 0) { + System.currentTimeMillis() + } else { + _appStartTime + } val applicationInfo = - applicationManager.getApplicationInfo(builder.clusterManager(), batchId).filter(_.id != null) + applicationManager.getApplicationInfo( + builder.clusterManager(), + batchId, + Some(submitTime)).filter(_.id != null) applicationInfo.foreach { _ => if (_appStartTime <= 0) { _appStartTime = System.currentTimeMillis() @@ -127,13 +141,13 @@ class BatchJobSubmission( } if (isTerminalState(state)) { - if (applicationInfo.isEmpty) { - applicationInfo = + if (_applicationInfo.isEmpty) { + _applicationInfo = Option(ApplicationInfo(id = null, name = null, state = ApplicationState.NOT_FOUND)) } } - applicationInfo.foreach { status => + _applicationInfo.foreach { status => val metadataToUpdate = Metadata( identifier = batchId, state = state.toString, @@ -154,7 +168,7 @@ class BatchJobSubmission( private def setStateIfNotCanceled(newState: OperationState): Unit = state.synchronized { if (state != CANCELED) { setState(newState) - applicationInfo.filter(_.id != null).foreach { ai => + _applicationInfo.filter(_.id != null).foreach { ai => session.getSessionEvent.foreach(_.engineId = ai.id) } if (newState == RUNNING) { @@ -184,8 +198,8 @@ class BatchJobSubmission( // submitted batch application. recoveryMetadata.map { metadata => if (metadata.state == OperationState.PENDING.toString) { - applicationInfo = currentApplicationInfo - applicationInfo.map(_.id) match { + _applicationInfo = currentApplicationInfo + _applicationInfo.map(_.id) match { case Some(null) => submitAndMonitorBatchJob() case Some(appId) => @@ -226,10 +240,10 @@ class BatchJobSubmission( try { info(s"Submitting $batchType batch[$batchId] job:\n$builder") val process = builder.start - applicationInfo = currentApplicationInfo - while (!applicationFailed(applicationInfo) && process.isAlive) { + _applicationInfo = currentApplicationInfo + while (!applicationFailed(_applicationInfo) && process.isAlive) { if (!appStatusFirstUpdated) { - if (applicationInfo.isDefined) { + if (_applicationInfo.isDefined) { setStateIfNotCanceled(OperationState.RUNNING) updateBatchMetadata() appStatusFirstUpdated = true @@ -243,54 +257,56 @@ class BatchJobSubmission( } } process.waitFor(applicationCheckInterval, TimeUnit.MILLISECONDS) - applicationInfo = currentApplicationInfo + _applicationInfo = currentApplicationInfo } - if (applicationFailed(applicationInfo)) { + if (applicationFailed(_applicationInfo)) { process.destroyForcibly() - throw new RuntimeException(s"Batch job failed: $applicationInfo") + throw new RuntimeException(s"Batch job failed: ${_applicationInfo}") } else { process.waitFor() if (process.exitValue() != 0) { throw new KyuubiException(s"Process exit with value ${process.exitValue()}") } - Option(applicationInfo.map(_.id)).foreach { + Option(_applicationInfo.map(_.id)).foreach { case Some(appId) => monitorBatchJob(appId) case _ => } } } finally { builder.close() + cleanupUploadedResourceIfNeeded() } } private def monitorBatchJob(appId: String): Unit = { info(s"Monitoring submitted $batchType batch[$batchId] job: $appId") - if (applicationInfo.isEmpty) { - applicationInfo = currentApplicationInfo + if (_applicationInfo.isEmpty) { + _applicationInfo = currentApplicationInfo } if (state == OperationState.PENDING) { setStateIfNotCanceled(OperationState.RUNNING) } - if (applicationInfo.isEmpty) { + if (_applicationInfo.isEmpty) { info(s"The $batchType batch[$batchId] job: $appId not found, assume that it has finished.") - } else if (applicationFailed(applicationInfo)) { - throw new RuntimeException(s"$batchType batch[$batchId] job failed: $applicationInfo") + } else if (applicationFailed(_applicationInfo)) { + throw new RuntimeException(s"$batchType batch[$batchId] job failed: ${_applicationInfo}") } else { updateBatchMetadata() // TODO: add limit for max batch job submission lifetime - while (applicationInfo.isDefined && !applicationTerminated(applicationInfo)) { + while (_applicationInfo.isDefined && !applicationTerminated(_applicationInfo)) { Thread.sleep(applicationCheckInterval) val newApplicationStatus = currentApplicationInfo - if (newApplicationStatus.map(_.state) != applicationInfo.map(_.state)) { - applicationInfo = newApplicationStatus - info(s"Batch report for $batchId, $applicationInfo") + if (newApplicationStatus.map(_.state) != _applicationInfo.map(_.state)) { + _applicationInfo = newApplicationStatus + updateBatchMetadata() + info(s"Batch report for $batchId, ${_applicationInfo}") } } - if (applicationFailed(applicationInfo)) { - throw new RuntimeException(s"$batchType batch[$batchId] job failed: $applicationInfo") + if (applicationFailed(_applicationInfo)) { + throw new RuntimeException(s"$batchType batch[$batchId] job failed: ${_applicationInfo}") } } } @@ -319,12 +335,14 @@ class BatchJobSubmission( if (isTerminalState(state)) { killMessage = (false, s"batch $batchId is already terminal so can not kill it.") builder.close() + cleanupUploadedResourceIfNeeded() return } try { killMessage = killBatchApplication() builder.close() + cleanupUploadedResourceIfNeeded() } finally { if (state == OperationState.INITIALIZED) { // if state is INITIALIZED, it means that the batch submission has not started to run, set @@ -355,6 +373,16 @@ class BatchJobSubmission( override def isTimedOut: Boolean = false override protected def eventEnabled: Boolean = true + + private def cleanupUploadedResourceIfNeeded(): Unit = { + if (session.isResourceUploaded) { + try { + Files.deleteIfExists(Paths.get(resource)) + } catch { + case e: Throwable => error(s"Error deleting the uploaded resource: $resource", e) + } + } + } } object BatchJobSubmission { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala index 4e818355ec6..4767cbf121b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala @@ -19,11 +19,13 @@ package org.apache.kyuubi.operation import scala.collection.JavaConverters._ +import com.codahale.metrics.MetricRegistry import org.apache.hive.service.rpc.thrift.{TGetOperationStatusResp, TOperationState, TProtocolVersion} import org.apache.hive.service.rpc.thrift.TOperationState._ import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.FetchOrientation.FETCH_NEXT import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session @@ -61,7 +63,8 @@ class ExecuteStatement( // We need to avoid executing query in sync mode, because there is no heartbeat mechanism // in thrift protocol, in sync mode, we cannot distinguish between long-run query and // engine crash without response before socket read timeout. - _remoteOpHandle = client.executeStatement(statement, confOverlay, true, queryTimeout) + _remoteOpHandle = + client.executeStatement(statement, confOverlay ++ operationHandleConf, true, queryTimeout) setHasResultSet(_remoteOpHandle.isHasResultSet) } catch onError() } @@ -131,6 +134,12 @@ class ExecuteStatement( } sendCredentialsIfNeeded() } + MetricsSystem.tracing { ms => + val execTime = System.currentTimeMillis() - startTime + ms.updateHistogram( + MetricRegistry.name(MetricsConstants.OPERATION_EXEC_TIME, opType), + execTime) + } // see if anymore log could be fetched fetchQueryLog() } catch onError() diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala index cf10b2da41a..605c4cca6b8 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala @@ -31,7 +31,11 @@ import org.apache.kyuubi.util.ThriftUtils abstract class KyuubiApplicationOperation(session: Session) extends KyuubiOperation(session) { - private[kyuubi] def currentApplicationInfo: Option[ApplicationInfo] + protected def currentApplicationInfo: Option[ApplicationInfo] + + protected def applicationInfoMap: Option[Map[String, String]] = { + currentApplicationInfo.map(_.toMap) + } override def getResultSetMetadata: TGetResultSetMetadataResp = { val schema = new TTableSchema() @@ -51,7 +55,7 @@ abstract class KyuubiApplicationOperation(session: Session) extends KyuubiOperat } override def getNextRowSet(order: FetchOrientation, rowSetSize: Int): TRowSet = { - currentApplicationInfo.map(_.toMap).map { state => + applicationInfoMap.map { state => val tRow = new TRowSet(0, new JArrayList[TRow](state.size)) Seq(state.keys, state.values.map(Option(_).getOrElse(""))).map(_.toSeq.asJava).foreach { col => diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala index 638985ea12b..106a11e4b25 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala @@ -26,6 +26,7 @@ import org.apache.thrift.TException import org.apache.thrift.transport.TTransportException import org.apache.kyuubi.{KyuubiSQLException, Utils} +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_OPERATION_HANDLE_KEY import org.apache.kyuubi.events.{EventBus, KyuubiOperationEvent} import org.apache.kyuubi.metrics.MetricsConstants.{OPERATION_FAIL, OPERATION_OPEN, OPERATION_STATE, OPERATION_TOTAL} import org.apache.kyuubi.metrics.MetricsSystem @@ -46,6 +47,8 @@ abstract class KyuubiOperation(session: Session) extends AbstractOperation(sessi protected[operation] lazy val client = session.asInstanceOf[KyuubiSessionImpl].client + protected val operationHandleConf = Map(KYUUBI_OPERATION_HANDLE_KEY -> handle.identifier.toString) + @volatile protected var _remoteOpHandle: TOperationHandle = _ def remoteOpHandle(): TOperationHandle = _remoteOpHandle @@ -176,7 +179,9 @@ abstract class KyuubiOperation(session: Session) extends AbstractOperation(sessi override def setState(newState: OperationState): Unit = { MetricsSystem.tracing { ms => - ms.markMeter(MetricRegistry.name(OPERATION_STATE, opType, state.toString.toLowerCase), -1) + if (!OperationState.isTerminal(state)) { + ms.markMeter(MetricRegistry.name(OPERATION_STATE, opType, state.toString.toLowerCase), -1) + } ms.markMeter(MetricRegistry.name(OPERATION_STATE, opType, newState.toString.toLowerCase)) ms.markMeter(MetricRegistry.name(OPERATION_STATE, newState.toString.toLowerCase)) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala index 0444b92fd81..fb4f39e262b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala @@ -33,7 +33,7 @@ class LaunchEngine(session: KyuubiSessionImpl, override val shouldRunAsync: Bool } override def getOperationLog: Option[OperationLog] = Option(_operationLog) - override private[kyuubi] def currentApplicationInfo: Option[ApplicationInfo] = { + override protected def currentApplicationInfo: Option[ApplicationInfo] = { Option(client).map { cli => ApplicationInfo( cli.engineId.orNull, @@ -68,4 +68,9 @@ class LaunchEngine(session: KyuubiSessionImpl, override val shouldRunAsync: Bool if (!shouldRunAsync) getBackgroundHandle.get() } + + override protected def applicationInfoMap: Option[Map[String, String]] = { + super.applicationInfoMap.map { _ + ("refId" -> session.engine.getEngineRefId()) } + } + } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala index d8b66416375..68bf11d7f99 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala @@ -152,9 +152,11 @@ trait BackendServiceMetric extends BackendService { } } - abstract override def getOperationStatus(operationHandle: OperationHandle): OperationStatus = { + abstract override def getOperationStatus( + operationHandle: OperationHandle, + maxWait: Option[Long] = None): OperationStatus = { MetricsSystem.timerTracing(MetricsConstants.BS_GET_OPERATION_STATUS) { - super.getOperationStatus(operationHandle) + super.getOperationStatus(operationHandle, maxWait) } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala index 29f4cf30419..cd191afe834 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala @@ -26,14 +26,14 @@ import javax.ws.rs.core.Response.Status import com.google.common.annotations.VisibleForTesting import org.apache.hadoop.conf.Configuration -import org.eclipse.jetty.servlet.FilterHolder +import org.eclipse.jetty.servlet.{ErrorPageErrorHandler, FilterHolder} import org.apache.kyuubi.{KyuubiException, Utils} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.{FRONTEND_REST_BIND_HOST, FRONTEND_REST_BIND_PORT, FRONTEND_REST_MAX_WORKER_THREADS, METADATA_RECOVERY_THREADS} +import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.server.api.v1.ApiRootResource import org.apache.kyuubi.server.http.authentication.{AuthenticationFilter, KyuubiHttpAuthenticationFactory} -import org.apache.kyuubi.server.ui.JettyServer +import org.apache.kyuubi.server.ui.{JettyServer, JettyUtils} import org.apache.kyuubi.service.{AbstractFrontendService, Serverable, Service, ServiceUtils} import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory import org.apache.kyuubi.session.{KyuubiSessionManager, SessionHandle} @@ -58,7 +58,10 @@ class KyuubiRestFrontendService(override val serverable: Serverable) lazy val host: String = conf.get(FRONTEND_REST_BIND_HOST) .getOrElse { - if (conf.get(KyuubiConf.FRONTEND_CONNECTION_URL_USE_HOSTNAME)) { + if (Utils.isWindows || Utils.isMac) { + warn(s"Kyuubi Server run in Windows or Mac environment, binding $getName to 0.0.0.0") + "0.0.0.0" + } else if (conf.get(KyuubiConf.FRONTEND_CONNECTION_URL_USE_HOSTNAME)) { Utils.findLocalInetAddress.getCanonicalHostName } else { Utils.findLocalInetAddress.getHostAddress @@ -95,6 +98,18 @@ class KyuubiRestFrontendService(override val serverable: Serverable) server.addRedirectHandler("/docs", "/swagger/") server.addRedirectHandler("/docs/", "/swagger/") server.addRedirectHandler("/swagger", "/swagger/") + + installWebUI() + } + + private def installWebUI(): Unit = { + val servletHandler = JettyUtils.createStaticHandler("dist", "/ui") + // HTML5 Web History Mode requires redirect any url path under Web UI Servlet to the main page. + // See more details at https://router.vuejs.org/guide/essentials/history-mode.html#html5-mode + val errorHandler = new ErrorPageErrorHandler + errorHandler.addErrorPage(404, "/") + servletHandler.setErrorHandler(errorHandler) + server.addHandler(servletHandler) } private def startBatchChecker(): Unit = { @@ -162,7 +177,6 @@ class KyuubiRestFrontendService(override val serverable: Serverable) server.start() recoverBatchSessions() isStarted.set(true) - info(s"$getName has started at ${server.getServerUri}") startBatchChecker() startInternal() } catch { @@ -170,6 +184,7 @@ class KyuubiRestFrontendService(override val serverable: Serverable) } } super.start() + info(s"Exposing REST endpoint at: http://${server.getServerUri}") } override def stop(): Unit = synchronized { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala index 731ad5df629..a7f2e817837 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala @@ -33,6 +33,7 @@ import org.apache.kyuubi.ha.client.{AuthTypes, ServiceDiscovery} import org.apache.kyuubi.metrics.{MetricsConf, MetricsSystem} import org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStoreConf import org.apache.kyuubi.service.{AbstractBackendService, AbstractFrontendService, Serverable, ServiceState} +import org.apache.kyuubi.session.KyuubiSessionManager import org.apache.kyuubi.util.{KyuubiHadoopUtils, SignalRegister} import org.apache.kyuubi.zookeeper.EmbeddedZookeeper @@ -82,7 +83,7 @@ object KyuubiServer extends Logging { | /\___/ | \/__/ """.stripMargin) - info(s"Version: $KYUUBI_VERSION, Revision: $REVISION, Branch: $BRANCH," + + info(s"Version: $KYUUBI_VERSION, Revision: $REVISION ($REVISION_TIME), Branch: $BRANCH," + s" Java: $JAVA_COMPILE_VERSION, Scala: $SCALA_COMPILE_VERSION," + s" Spark: $SPARK_COMPILE_VERSION, Hadoop: $HADOOP_COMPILE_VERSION," + s" Hive: $HIVE_COMPILE_VERSION, Flink: $FLINK_COMPILE_VERSION," + @@ -128,6 +129,14 @@ object KyuubiServer extends Logging { info(s"Refreshed user defaults configs with changes of " + s"unset: $unsetCount, updated: $updatedCount, added: $addedCount") } + + private[kyuubi] def refreshUnlimitedUsers(): Unit = synchronized { + val sessionMgr = kyuubiServer.backendService.sessionManager.asInstanceOf[KyuubiSessionManager] + val existingUnlimitedUsers = sessionMgr.getUnlimitedUsers() + sessionMgr.refreshUnlimitedUsers(KyuubiConf().loadFileDefaults()) + val refreshedUnlimitedUsers = sessionMgr.getUnlimitedUsers() + info(s"Refreshed unlimited users from $existingUnlimitedUsers to $refreshedUnlimitedUsers") + } } class KyuubiServer(name: String) extends Serverable(name) { @@ -148,7 +157,7 @@ class KyuubiServer(name: String) extends Serverable(name) { warn("MYSQL frontend protocol is experimental.") new KyuubiMySQLFrontendService(this) case TRINO => - warn("Trio frontend protocol is experimental.") + warn("Trino frontend protocol is experimental.") new KyuubiTrinoFrontendService(this) case other => throw new UnsupportedOperationException(s"Frontend protocol $other is not supported yet.") @@ -160,6 +169,9 @@ class KyuubiServer(name: String) extends Serverable(name) { val kinit = new KinitAuxiliaryService() addService(kinit) + val periodicGCService = new PeriodicGCService + addService(periodicGCService) + if (conf.get(MetricsConf.METRICS_ENABLED)) { addService(new MetricsSystem) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTrinoFrontendService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTrinoFrontendService.scala index fca8b8a8787..573bb948f90 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTrinoFrontendService.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTrinoFrontendService.scala @@ -64,15 +64,6 @@ class KyuubiTrinoFrontendService(override val serverable: Serverable) private def startInternal(): Unit = { val contextHandler = ApiRootResource.getServletHandler(this) server.addHandler(contextHandler) - - server.addStaticHandler("org/apache/kyuubi/ui/static", "/static/") - server.addRedirectHandler("/", "/static/") - server.addRedirectHandler("/static", "/static/") - server.addStaticHandler("META-INF/resources/webjars/swagger-ui/4.9.1/", "/swagger-static/") - server.addStaticHandler("org/apache/kyuubi/ui/swagger", "/swagger/") - server.addRedirectHandler("/docs", "/swagger/") - server.addRedirectHandler("/docs/", "/swagger/") - server.addRedirectHandler("/swagger", "/swagger/") } override def start(): Unit = synchronized { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala new file mode 100644 index 00000000000..a4035b689d5 --- /dev/null +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.server + +import java.util.concurrent.TimeUnit + +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.service.AbstractService +import org.apache.kyuubi.util.ThreadUtils + +class PeriodicGCService(name: String) extends AbstractService(name) { + def this() = this(classOf[PeriodicGCService].getSimpleName) + + private val gcTrigger = ThreadUtils.newDaemonSingleThreadScheduledExecutor("periodic-gc-trigger") + + override def start(): Unit = { + startGcTrigger() + super.start() + } + + override def stop(): Unit = { + super.stop() + ThreadUtils.shutdown(gcTrigger) + } + + private def startGcTrigger(): Unit = { + val interval = conf.get(KyuubiConf.SERVER_PERIODIC_GC_INTERVAL) + gcTrigger.scheduleWithFixedDelay(() => System.gc(), interval, interval, TimeUnit.MILLISECONDS) + } +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala new file mode 100644 index 00000000000..ebbf04c9073 --- /dev/null +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.server.api + +import scala.collection.JavaConverters._ + +import org.apache.kyuubi.Utils +import org.apache.kyuubi.client.api.v1.dto.{OperationData, SessionData} +import org.apache.kyuubi.events.KyuubiOperationEvent +import org.apache.kyuubi.operation.KyuubiOperation +import org.apache.kyuubi.session.KyuubiSession + +object ApiUtils { + + def sessionData(session: KyuubiSession): SessionData = { + val sessionEvent = session.getSessionEvent + new SessionData( + session.handle.identifier.toString, + session.user, + session.ipAddress, + session.conf.asJava, + session.createTime, + session.lastAccessTime - session.createTime, + session.getNoOperationTime, + sessionEvent.flatMap(_.exception).map(Utils.prettyPrint).getOrElse(""), + session.sessionType.toString, + session.connectionUrl, + sessionEvent.map(_.engineId).getOrElse("")) + } + + def operationData(operation: KyuubiOperation): OperationData = { + val opEvent = KyuubiOperationEvent(operation) + new OperationData( + opEvent.statementId, + opEvent.statement, + opEvent.state, + opEvent.createTime, + opEvent.startTime, + opEvent.completeTime, + opEvent.exception.map(Utils.prettyPrint).getOrElse(""), + opEvent.sessionId, + opEvent.sessionUser, + opEvent.sessionType, + operation.getSession.asInstanceOf[KyuubiSession].connectionUrl) + } +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/OpenAPIConfig.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/OpenAPIConfig.scala index c4733a0b0e4..d8b48965638 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/OpenAPIConfig.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/OpenAPIConfig.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.server.api +import org.glassfish.jersey.media.multipart.MultiPartFeature import org.glassfish.jersey.server.ResourceConfig import org.apache.kyuubi.server.api.v1.KyuubiOpenApiResource @@ -26,4 +27,5 @@ class OpenAPIConfig extends ResourceConfig { register(classOf[KyuubiOpenApiResource]) register(classOf[KyuubiScalaObjectMapper]) register(classOf[RestExceptionMapper]) + register(classOf[MultiPartFeature]) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala index a92992e66f4..0d8b31b2c65 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala @@ -24,29 +24,33 @@ import javax.ws.rs.core.{MediaType, Response} import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer -import io.swagger.v3.oas.annotations.media.Content +import io.swagger.v3.oas.annotations.media.{ArraySchema, Content, Schema} import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag +import org.apache.commons.lang3.StringUtils +import org.apache.zookeeper.KeeperException.NoNodeException import org.apache.kyuubi.{KYUUBI_VERSION, Logging, Utils} -import org.apache.kyuubi.client.api.v1.dto.Engine +import org.apache.kyuubi.client.api.v1.dto.{Engine, OperationData, SessionData} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.ha.HighAvailabilityConf.HA_NAMESPACE import org.apache.kyuubi.ha.client.{DiscoveryPaths, ServiceNodeInfo} import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient +import org.apache.kyuubi.operation.{KyuubiOperation, OperationHandle} import org.apache.kyuubi.server.KyuubiServer -import org.apache.kyuubi.server.api.ApiRequestContext +import org.apache.kyuubi.server.api.{ApiRequestContext, ApiUtils} +import org.apache.kyuubi.session.{KyuubiSession, SessionHandle} @Tag(name = "Admin") @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AdminResource extends ApiRequestContext with Logging { - private lazy val administrator = Utils.currentUser + private lazy val administrators = fe.getConf.get(KyuubiConf.SERVER_ADMINISTRATORS).toSet + + Utils.currentUser @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "refresh the Kyuubi server hadoop conf, note that, " + "it only takes affect for frontend services now") @POST @@ -55,7 +59,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh Kyuubi server hadoop conf request from $userName/$ipAddress") - if (!userName.equals(administrator)) { + if (!isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the Kyuubi server hadoop conf") } @@ -66,8 +70,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "refresh the user defaults configs") @POST @Path("refresh/user_defaults_conf") @@ -75,7 +78,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh user defaults conf request from $userName/$ipAddress") - if (!userName.equals(administrator)) { + if (!isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the user defaults conf") } @@ -84,10 +87,124 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { Response.ok(s"Refresh the user defaults conf successfully.").build() } + @ApiResponse( + responseCode = "200", + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), + description = "refresh the unlimited users") + @POST + @Path("refresh/unlimited_users") + def refreshUnlimitedUser(): Response = { + val userName = fe.getSessionUser(Map.empty[String, String]) + val ipAddress = fe.getIpAddress + info(s"Receive refresh unlimited users request from $userName/$ipAddress") + if (!isAdministrator(userName)) { + throw new NotAllowedException( + s"$userName is not allowed to refresh the unlimited users") + } + info(s"Reloading unlimited users") + KyuubiServer.refreshUnlimitedUsers() + Response.ok(s"Refresh the unlimited users successfully.").build() + } + + @ApiResponse( + responseCode = "200", + content = Array(new Content( + mediaType = MediaType.APPLICATION_JSON, + array = new ArraySchema(schema = new Schema(implementation = classOf[SessionData])))), + description = "get the list of all live sessions") + @GET + @Path("sessions") + def sessions(@QueryParam("users") users: String): Seq[SessionData] = { + val userName = fe.getSessionUser(Map.empty[String, String]) + val ipAddress = fe.getIpAddress + info(s"Received listing all live sessions request from $userName/$ipAddress") + if (!isAdministrator(userName)) { + throw new NotAllowedException( + s"$userName is not allowed to list all live sessions") + } + var sessions = fe.be.sessionManager.allSessions() + if (StringUtils.isNotBlank(users)) { + val usersSet = users.split(",").toSet + sessions = sessions.filter(session => usersSet.contains(session.user)) + } + sessions.map { case session => + ApiUtils.sessionData(session.asInstanceOf[KyuubiSession]) + }.toSeq + } + + @ApiResponse( + responseCode = "200", + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), + description = "Close a session") + @DELETE + @Path("sessions/{sessionHandle}") + def closeSession(@PathParam("sessionHandle") sessionHandleStr: String): Response = { + val userName = fe.getSessionUser(Map.empty[String, String]) + val ipAddress = fe.getIpAddress + info(s"Received closing a session request from $userName/$ipAddress") + if (!isAdministrator(userName)) { + throw new NotAllowedException( + s"$userName is not allowed to close the session $sessionHandleStr") + } + fe.be.closeSession(SessionHandle.fromUUID(sessionHandleStr)) + Response.ok(s"Session $sessionHandleStr is closed successfully.").build() + } + @ApiResponse( responseCode = "200", content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + mediaType = MediaType.APPLICATION_JSON, + array = new ArraySchema(schema = new Schema(implementation = + classOf[OperationData])))), + description = + "get the list of all active operations") + @GET + @Path("operations") + def listOperations( + @QueryParam("users") users: String, + @QueryParam("sessionHandle") sessionHandle: String): Seq[OperationData] = { + val userName = fe.getSessionUser(Map.empty[String, String]) + val ipAddress = fe.getIpAddress + info(s"Received listing all of the active operations request from $userName/$ipAddress") + if (!isAdministrator(userName)) { + throw new NotAllowedException( + s"$userName is not allowed to list all the operations") + } + var operations = fe.be.sessionManager.operationManager.allOperations() + if (StringUtils.isNotBlank(users)) { + val usersSet = users.split(",").toSet + operations = operations.filter(operation => usersSet.contains(operation.getSession.user)) + } + if (StringUtils.isNotBlank(sessionHandle)) { + operations = operations.filter(operation => + operation.getSession.handle.equals(SessionHandle.fromUUID(sessionHandle))) + } + operations + .map(operation => ApiUtils.operationData(operation.asInstanceOf[KyuubiOperation])).toSeq + } + + @ApiResponse( + responseCode = "200", + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), + description = "close an operation") + @DELETE + @Path("operations/{operationHandle}") + def closeOperation(@PathParam("operationHandle") operationHandleStr: String): Response = { + val userName = fe.getSessionUser(Map.empty[String, String]) + val ipAddress = fe.getIpAddress + info(s"Received close an operation request from $userName/$ipAddress") + if (!isAdministrator(userName)) { + throw new NotAllowedException( + s"$userName is not allowed to close the operation $operationHandleStr") + } + val operationHandle = OperationHandle(operationHandleStr) + fe.be.closeOperation(operationHandle) + Response.ok(s"Operation $operationHandleStr is closed successfully.").build() + } + + @ApiResponse( + responseCode = "200", + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "delete kyuubi engine") @DELETE @Path("engine") @@ -96,7 +213,11 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { @QueryParam("sharelevel") shareLevel: String, @QueryParam("subdomain") subdomain: String, @QueryParam("hive.server2.proxy.user") hs2ProxyUser: String): Response = { - val userName = fe.getSessionUser(hs2ProxyUser) + val userName = if (isAdministrator(fe.getRealUser())) { + Option(hs2ProxyUser).getOrElse(fe.getRealUser()) + } else { + fe.getSessionUser(hs2ProxyUser) + } val engine = getEngine(userName, engineType, shareLevel, subdomain, "default") val engineSpace = getEngineSpace(engine) @@ -121,8 +242,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "list kyuubi engines") @GET @Path("engine") @@ -131,7 +251,11 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { @QueryParam("sharelevel") shareLevel: String, @QueryParam("subdomain") subdomain: String, @QueryParam("hive.server2.proxy.user") hs2ProxyUser: String): Seq[Engine] = { - val userName = fe.getSessionUser(hs2ProxyUser) + val userName = if (isAdministrator(fe.getRealUser())) { + Option(hs2ProxyUser).getOrElse(fe.getRealUser()) + } else { + fe.getSessionUser(hs2ProxyUser) + } val engine = getEngine(userName, engineType, shareLevel, subdomain, "") val engineSpace = getEngineSpace(engine) @@ -144,9 +268,19 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { } case None => withDiscoveryClient(fe.getConf) { discoveryClient => - discoveryClient.getChildren(engineSpace).map { child => - info(s"Listing engine nodes for $engineSpace/$child") - engineNodes ++= discoveryClient.getServiceNodesInfo(s"$engineSpace/$child") + try { + discoveryClient.getChildren(engineSpace).map { child => + info(s"Listing engine nodes for $engineSpace/$child") + engineNodes ++= discoveryClient.getServiceNodesInfo(s"$engineSpace/$child") + } + } catch { + case nne: NoNodeException => + error( + s"No such engine for user: $userName, " + + s"engine type: $engineType, share level: $shareLevel, subdomain: $subdomain", + nne) + throw new NotFoundException(s"No such engine for user: $userName, " + + s"engine type: $engineType, share level: $shareLevel, subdomain: $subdomain") } } } @@ -197,4 +331,8 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { engine.getUser, engine.getSubdomain) } + + private def isAdministrator(userName: String): Boolean = { + administrators.contains(userName); + } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/ApiRootResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/ApiRootResource.scala index 0d91da868af..d8b997e865c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/ApiRootResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/ApiRootResource.scala @@ -37,8 +37,7 @@ private[v1] class ApiRootResource extends ApiRequestContext { @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Get the version of Kyuubi server.") @GET @Path("version") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala index 487362d96b1..4814996a4a1 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala @@ -17,31 +17,37 @@ package org.apache.kyuubi.server.api.v1 -import java.util.Locale +import java.io.InputStream +import java.util +import java.util.{Collections, Locale, UUID} import java.util.concurrent.ConcurrentHashMap import javax.ws.rs._ import javax.ws.rs.core.MediaType import javax.ws.rs.core.Response.Status import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} import scala.util.control.NonFatal import io.swagger.v3.oas.annotations.media.{Content, Schema} import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag +import org.glassfish.jersey.media.multipart.{FormDataContentDisposition, FormDataParam} import org.apache.kyuubi.{Logging, Utils} import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.client.exception.KyuubiRestException +import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiReservedKeys._ -import org.apache.kyuubi.engine.ApplicationInfo +import org.apache.kyuubi.engine.{ApplicationInfo, KyuubiApplicationManager} import org.apache.kyuubi.operation.{BatchJobSubmission, FetchOrientation, OperationState} import org.apache.kyuubi.server.api.ApiRequestContext import org.apache.kyuubi.server.api.v1.BatchesResource._ import org.apache.kyuubi.server.metadata.MetadataManager import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.{KyuubiBatchSessionImpl, KyuubiSessionManager, SessionHandle} +import org.apache.kyuubi.util.JdbcUtils @Tag(name = "Batch") @Produces(Array(MediaType.APPLICATION_JSON)) @@ -68,7 +74,7 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { private def buildBatch(session: KyuubiBatchSessionImpl): Batch = { val batchOp = session.batchJobSubmissionOp val batchOpStatus = batchOp.getStatus - val batchAppStatus = batchOp.currentApplicationInfo + val batchAppStatus = batchOp.getOrFetchCurrentApplicationInfo val name = Option(batchOp.batchName).getOrElse(batchAppStatus.map(_.name).orNull) var appId: String = null @@ -102,7 +108,8 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { session.connectionUrl, batchOpStatus.state.toString, session.createTime, - batchOpStatus.completed) + batchOpStatus.completed, + Map.empty[String, String].asJava) } private def buildBatch( @@ -139,7 +146,8 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { metadata.kyuubiInstance, currentBatchState, metadata.createTime, - metadata.endTime) + metadata.endTime, + Map.empty[String, String].asJava) }.getOrElse(MetadataManager.buildBatch(metadata)) } @@ -161,6 +169,46 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { @POST @Consumes(Array(MediaType.APPLICATION_JSON)) def openBatchSession(request: BatchRequest): Batch = { + openBatchSessionInternal(request) + } + + @ApiResponse( + responseCode = "200", + content = Array(new Content( + mediaType = MediaType.APPLICATION_JSON, + schema = new Schema(implementation = classOf[Batch]))), + description = "create and open a batch session with uploading resource file") + @POST + @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) + def openBatchSessionWithUpload( + @FormDataParam("batchRequest") batchRequest: BatchRequest, + @FormDataParam("resourceFile") resourceFileInputStream: InputStream, + @FormDataParam("resourceFile") resourceFileMetadata: FormDataContentDisposition): Batch = { + require( + fe.getConf.get(KyuubiConf.BATCH_RESOURCE_UPLOAD_ENABLED), + "Batch resource upload function is not enabled.") + require( + batchRequest != null, + "batchRequest is required and please check the content type" + + " of batchRequest is application/json") + val tempFile = Utils.writeToTempFile( + resourceFileInputStream, + KyuubiApplicationManager.uploadWorkDir, + resourceFileMetadata.getFileName) + batchRequest.setResource(tempFile.getPath) + openBatchSessionInternal(batchRequest, isResourceFromUpload = true) + } + + /** + * open new batch session with request + * + * @param request instance of BatchRequest + * @param isResourceFromUpload whether to clean up temporary uploaded resource file + * in local path after execution + */ + private def openBatchSessionInternal( + request: BatchRequest, + isResourceFromUpload: Boolean = false): Batch = { require( supportedBatchType(request.getBatchType), s"${request.getBatchType} is not in the supported list: $SUPPORTED_BATCH_TYPES}") @@ -170,21 +218,55 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { } request.setBatchType(request.getBatchType.toUpperCase(Locale.ROOT)) - val userName = fe.getSessionUser(request.getConf.asScala.toMap) - val ipAddress = fe.getIpAddress - request.setConf( - (request.getConf.asScala ++ Map( - KYUUBI_CLIENT_IP_KEY -> ipAddress, - KYUUBI_SERVER_IP_KEY -> fe.host, - KYUUBI_SESSION_CONNECTION_URL_KEY -> fe.connectionUrl, - KYUUBI_SESSION_REAL_USER_KEY -> fe.getRealUser())).asJava) - val sessionHandle = sessionManager.openBatchSession( - userName, - "anonymous", - ipAddress, - request.getConf.asScala.toMap, - request) - buildBatch(sessionManager.getBatchSessionImpl(sessionHandle)) + val userProvidedBatchId = request.getConf.asScala.get(KYUUBI_BATCH_ID_KEY) + userProvidedBatchId.foreach { batchId => + try UUID.fromString(batchId) + catch { + case NonFatal(e) => + throw new IllegalArgumentException(s"$KYUUBI_BATCH_ID_KEY=$batchId must be an UUID", e) + } + } + + userProvidedBatchId.flatMap { batchId => + Option(sessionManager.getBatchFromMetadataStore(batchId)) + } match { + case Some(batch) => + markDuplicated(batch) + case None => + val userName = fe.getSessionUser(request.getConf.asScala.toMap) + val ipAddress = fe.getIpAddress + val batchId = userProvidedBatchId.getOrElse(UUID.randomUUID().toString) + request.setConf( + (request.getConf.asScala ++ Map( + KYUUBI_BATCH_ID_KEY -> batchId, + KYUUBI_BATCH_RESOURCE_UPLOADED_KEY -> isResourceFromUpload.toString, + KYUUBI_CLIENT_IP_KEY -> ipAddress, + KYUUBI_SERVER_IP_KEY -> fe.host, + KYUUBI_SESSION_CONNECTION_URL_KEY -> fe.connectionUrl, + KYUUBI_SESSION_REAL_USER_KEY -> fe.getRealUser())).asJava) + + Try { + sessionManager.openBatchSession( + userName, + "anonymous", + ipAddress, + request.getConf.asScala.toMap, + request) + } match { + case Success(sessionHandle) => + buildBatch(sessionManager.getBatchSessionImpl(sessionHandle)) + case Failure(cause) if JdbcUtils.isDuplicatedKeyDBErr(cause) => + val batch = sessionManager.getBatchFromMetadataStore(batchId) + assert(batch != null, s"can not find duplicated batch $batchId from metadata store") + markDuplicated(batch) + } + } + } + + private def markDuplicated(batch: Batch): Batch = { + warn(s"duplicated submission: ${batch.getId}, ignore and return the existing batch.") + batch.setBatchInfo(Map(KYUUBI_BATCH_DUPLICATED_KEY -> "true").asJava) + batch } @ApiResponse( @@ -214,7 +296,8 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { error(s"Error redirecting get batch[$batchId] to ${metadata.kyuubiInstance}", e) val batchAppStatus = sessionManager.applicationManager.getApplicationInfo( metadata.clusterManager, - batchId) + batchId, + Some(metadata.createTime)) buildBatch(metadata, batchAppStatus) } } @@ -278,12 +361,16 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { Option(sessionManager.getBatchSessionImpl(sessionHandle)).map { batchSession => try { val submissionOp = batchSession.batchJobSubmissionOp - val rowSet = submissionOp.getOperationLogRowSet( - FetchOrientation.FETCH_NEXT, - from, - size) - val logRowSet = rowSet.getColumns.get(0).getStringVal.getValues.asScala - new OperationLog(logRowSet.asJava, logRowSet.size) + val rowSet = submissionOp.getOperationLogRowSet(FetchOrientation.FETCH_NEXT, from, size) + val columns = rowSet.getColumns + val logRowSet: util.List[String] = + if (columns == null || columns.size == 0) { + Collections.emptyList() + } else { + assert(columns.size == 1) + columns.get(0).getStringVal.getValues + } + new OperationLog(logRowSet, logRowSet.size) } catch { case NonFatal(e) => val errorMsg = s"Error getting operation log for batchId: $batchId" diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala index b1b84c30801..70a6d3a2848 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala @@ -28,8 +28,7 @@ import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag import org.apache.hive.service.rpc.thrift._ -import org.apache.kyuubi.KyuubiSQLException -import org.apache.kyuubi.Logging +import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.events.KyuubiOperationEvent import org.apache.kyuubi.operation.{FetchOrientation, KyuubiOperation, OperationHandle} @@ -37,6 +36,7 @@ import org.apache.kyuubi.server.api.ApiRequestContext @Tag(name = "Operation") @Produces(Array(MediaType.APPLICATION_JSON)) +@Consumes(Array(MediaType.APPLICATION_JSON)) private[v1] class OperationsResource extends ApiRequestContext with Logging { @ApiResponse( @@ -64,8 +64,7 @@ private[v1] class OperationsResource extends ApiRequestContext with Logging { @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "apply an action for an operation") @PUT @@ -183,24 +182,55 @@ private[v1] class OperationsResource extends ApiRequestContext with Logging { i.getSetField.name(), i.getSetField match { case TColumnValue._Fields.STRING_VAL => - i.getStringVal.getFieldValue(TStringValue._Fields.VALUE) + if (i.getStringVal.isSetValue) { + i.getStringVal.getFieldValue(TStringValue._Fields.VALUE) + } else { + null + } case TColumnValue._Fields.BOOL_VAL => - i.getBoolVal.getFieldValue(TBoolValue._Fields.VALUE) + if (i.getBoolVal.isSetValue) { + i.getBoolVal.getFieldValue(TBoolValue._Fields.VALUE) + } else { + null + } case TColumnValue._Fields.BYTE_VAL => - i.getByteVal.getFieldValue(TByteValue._Fields.VALUE) + if (i.getByteVal.isSetValue) { + i.getByteVal.getFieldValue(TByteValue._Fields.VALUE) + } else { + null + } case TColumnValue._Fields.DOUBLE_VAL => - i.getDoubleVal.getFieldValue(TDoubleValue._Fields.VALUE) + if (i.getDoubleVal.isSetValue) { + i.getDoubleVal.getFieldValue(TDoubleValue._Fields.VALUE) + } else { + null + } case TColumnValue._Fields.I16_VAL => - i.getI16Val.getFieldValue(TI16Value._Fields.VALUE) + if (i.getI16Val.isSetValue) { + i.getI16Val.getFieldValue(TI16Value._Fields.VALUE) + } else { + null + } case TColumnValue._Fields.I32_VAL => - i.getI32Val.getFieldValue(TI32Value._Fields.VALUE) + if (i.getI32Val.isSetValue) { + i.getI32Val.getFieldValue(TI32Value._Fields.VALUE) + } else { + null + } case TColumnValue._Fields.I64_VAL => - i.getI64Val.getFieldValue(TI64Value._Fields.VALUE) + if (i.getI64Val.isSetValue) { + i.getI64Val.getFieldValue(TI64Value._Fields.VALUE) + } else { + null + } }) }).asJava) }) new ResultRowSet(rows.asJava, rows.size) } catch { + case e: IllegalArgumentException => + error(e.getMessage, e) + throw new BadRequestException(e.getMessage) case NonFatal(e) => val errorMsg = s"Error getting result row set for operation handle $operationHandleStr" error(errorMsg, e) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala index 80212faf2c3..81d1a27092f 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala @@ -33,14 +33,13 @@ import org.apache.kyuubi.Logging import org.apache.kyuubi.client.api.v1.dto import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.config.KyuubiReservedKeys._ -import org.apache.kyuubi.events.KyuubiEvent import org.apache.kyuubi.operation.OperationHandle -import org.apache.kyuubi.server.api.ApiRequestContext -import org.apache.kyuubi.session.KyuubiSession -import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.server.api.{ApiRequestContext, ApiUtils} +import org.apache.kyuubi.session.{KyuubiSession, SessionHandle} @Tag(name = "Session") @Produces(Array(MediaType.APPLICATION_JSON)) +@Consumes(Array(MediaType.APPLICATION_JSON)) private[v1] class SessionsResource extends ApiRequestContext with Logging { implicit def toSessionHandle(str: String): SessionHandle = SessionHandle.fromUUID(str) private def sessionManager = fe.be.sessionManager @@ -53,15 +52,8 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { description = "get the list of all live sessions") @GET def sessions(): Seq[SessionData] = { - sessionManager.allSessions().map { session => - new SessionData( - session.handle.identifier.toString, - session.user, - session.ipAddress, - session.conf.asJava, - session.createTime, - session.lastAccessTime - session.createTime, - session.getNoOperationTime) + sessionManager.allSessions().map { case session => + ApiUtils.sessionData(session.asInstanceOf[KyuubiSession]) }.toSeq } @@ -69,14 +61,32 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { responseCode = "200", content = Array(new Content( mediaType = MediaType.APPLICATION_JSON, - schema = new Schema(implementation = classOf[KyuubiEvent]))), + schema = new Schema(implementation = classOf[dto.KyuubiSessionEvent]))), description = "get a session event via session handle identifier") @GET @Path("{sessionHandle}") - def sessionInfo(@PathParam("sessionHandle") sessionHandleStr: String): KyuubiEvent = { + def sessionInfo(@PathParam("sessionHandle") sessionHandleStr: String): dto.KyuubiSessionEvent = { try { sessionManager.getSession(sessionHandleStr) - .asInstanceOf[KyuubiSession].getSessionEvent.get + .asInstanceOf[KyuubiSession].getSessionEvent.map(event => + dto.KyuubiSessionEvent.builder + .sessionId(event.sessionId) + .clientVersion(event.clientVersion) + .sessionType(event.sessionType) + .sessionName(event.sessionName) + .user(event.user) + .clientIp(event.clientIP) + .serverIp(event.serverIP) + .conf(event.conf.asJava) + .remoteSessionId(event.remoteSessionId) + .engineId(event.engineId) + .eventTime(event.eventTime) + .openedTime(event.openedTime) + .startTime(event.startTime) + .endTime(event.endTime) + .totalOperations(event.totalOperations) + .exception(event.exception.getOrElse(null)) + .build).get } catch { case NonFatal(e) => error(s"Invalid $sessionHandleStr", e) @@ -130,21 +140,20 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { def execPoolStatistic(): ExecPoolStatistic = { new ExecPoolStatistic( sessionManager.getExecPoolSize, - sessionManager.getActiveCount) + sessionManager.getActiveCount, + sessionManager.getWorkQueueSize) } @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Open(create) a session") @POST - @Consumes(Array(MediaType.APPLICATION_JSON)) def openSession(request: SessionOpenRequest): dto.SessionHandle = { val userName = fe.getSessionUser(request.getConfigs.asScala.toMap) val ipAddress = fe.getIpAddress val handle = fe.be.openSession( - TProtocolVersion.findByValue(request.getProtocolVersion), + SessionsResource.SESSION_PROTOCOL_VERSION, userName, "", ipAddress, @@ -158,8 +167,7 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Close a session") @DELETE @Path("{sessionHandle}") @@ -183,7 +191,7 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { fe.be.executeStatement( sessionHandleStr, request.getStatement, - Map.empty, + request.getConfOverlay.asScala.toMap, request.isRunAsync, request.getQueryTimeout) } catch { @@ -406,3 +414,7 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { } } } + +object SessionsResource { + final val SESSION_PROTOCOL_VERSION = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1 +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationAuditLogger.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationAuditLogger.scala index ac1ee2a63a6..ac74c449bdf 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationAuditLogger.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationAuditLogger.scala @@ -35,6 +35,7 @@ object AuthenticationAuditLogger extends Logging { sb.append(s"proxyIp=${HTTP_PROXY_HEADER_CLIENT_IP_ADDRESS.get()}").append("\t") sb.append(s"method=${request.getMethod}").append("\t") sb.append(s"uri=${request.getRequestURI}").append("\t") + sb.append(s"params=${request.getQueryString}").append("\t") sb.append(s"protocol=${request.getProtocol}").append("\t") sb.append(s"status=${response.getStatus}") info(sb.toString()) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala index 740937d8ec9..3c4065a7bdc 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala @@ -79,7 +79,6 @@ class AuthenticationFilter(conf: KyuubiConf) extends Filter with Logging { override def init(filterConfig: FilterConfig): Unit = { initAuthHandlers() - super.init(filterConfig) } private[kyuubi] def getMatchedHandler(authorization: String): Option[AuthenticationHandler] = { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala index 5cecd2ab149..88a7f4e4ebd 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala @@ -20,6 +20,8 @@ package org.apache.kyuubi.server.metadata import java.util.concurrent.{ConcurrentHashMap, ThreadPoolExecutor, TimeUnit} import java.util.concurrent.atomic.AtomicInteger +import scala.collection.JavaConverters._ + import org.apache.kyuubi.{KyuubiException, Logging} import org.apache.kyuubi.client.api.v1.dto.Batch import org.apache.kyuubi.config.KyuubiConf @@ -29,7 +31,7 @@ import org.apache.kyuubi.operation.OperationState import org.apache.kyuubi.server.metadata.api.{Metadata, MetadataFilter} import org.apache.kyuubi.service.AbstractService import org.apache.kyuubi.session.SessionType -import org.apache.kyuubi.util.{ClassUtils, ThreadUtils} +import org.apache.kyuubi.util.{ClassUtils, JdbcUtils, ThreadUtils} class MetadataManager extends AbstractService("MetadataManager") { import MetadataManager._ @@ -37,44 +39,55 @@ class MetadataManager extends AbstractService("MetadataManager") { private var _metadataStore: MetadataStore = _ // Visible for testing. - private[metadata] val identifierRequestsRetryRefs = + private[metadata] val identifierRequestsAsyncRetryRefs = new ConcurrentHashMap[String, MetadataRequestsRetryRef]() // Visible for testing. - private[metadata] val identifierRequestsRetryingCounts = + private[metadata] val identifierRequestsAsyncRetryingCounts = new ConcurrentHashMap[String, AtomicInteger]() - private val requestsRetryTrigger = - ThreadUtils.newDaemonSingleThreadScheduledExecutor("metadata-requests-retry-trigger") + private lazy val requestsRetryInterval = + conf.get(KyuubiConf.METADATA_REQUEST_RETRY_INTERVAL) + + private lazy val requestsAsyncRetryEnabled = + conf.get(KyuubiConf.METADATA_REQUEST_ASYNC_RETRY_ENABLED) + + private lazy val requestsAsyncRetryTrigger = + ThreadUtils.newDaemonSingleThreadScheduledExecutor("metadata-requests-async-retry-trigger") - private var requestsRetryExecutor: ThreadPoolExecutor = _ + private lazy val requestsAsyncRetryExecutor: ThreadPoolExecutor = + ThreadUtils.newDaemonFixedThreadPool( + conf.get(KyuubiConf.METADATA_REQUEST_ASYNC_RETRY_THREADS), + "metadata-requests-async-retry") - private var maxMetadataRequestsRetryRefs: Int = _ + private lazy val cleanerEnabled = conf.get(KyuubiConf.METADATA_CLEANER_ENABLED) - private val metadataCleaner = + private lazy val metadataCleaner = ThreadUtils.newDaemonSingleThreadScheduledExecutor("metadata-cleaner") override def initialize(conf: KyuubiConf): Unit = { _metadataStore = MetadataManager.createMetadataStore(conf) - val retryExecutorNumThreads = - conf.get(KyuubiConf.METADATA_REQUEST_RETRY_THREADS) - requestsRetryExecutor = ThreadUtils.newDaemonFixedThreadPool( - retryExecutorNumThreads, - "metadata-requests-retry-executor") - maxMetadataRequestsRetryRefs = conf.get(KyuubiConf.METADATA_REQUEST_RETRY_QUEUE_SIZE) super.initialize(conf) } override def start(): Unit = { super.start() - startMetadataRequestsRetryTrigger() - startMetadataCleaner() + if (requestsAsyncRetryEnabled) { + startMetadataRequestsAsyncRetryTrigger() + } + if (cleanerEnabled) { + startMetadataCleaner() + } } override def stop(): Unit = { - ThreadUtils.shutdown(requestsRetryTrigger) - ThreadUtils.shutdown(requestsRetryExecutor) - ThreadUtils.shutdown(metadataCleaner) + if (requestsAsyncRetryEnabled) { + ThreadUtils.shutdown(requestsAsyncRetryTrigger) + ThreadUtils.shutdown(requestsAsyncRetryExecutor) + } + if (cleanerEnabled) { + ThreadUtils.shutdown(metadataCleaner) + } _metadataStore.close() super.stop() } @@ -93,11 +106,19 @@ class MetadataManager extends AbstractService("MetadataManager") { } } - def insertMetadata(metadata: Metadata, retryOnError: Boolean = true): Unit = { + protected def unrecoverableDBErr(cause: Throwable): Boolean = { + // cover other cases in the future + JdbcUtils.isDuplicatedKeyDBErr(cause) + } + + def insertMetadata(metadata: Metadata, asyncRetryOnError: Boolean = true): Unit = { try { withMetadataRequestMetrics(_metadataStore.insertMetadata(metadata)) } catch { - case e: Throwable if retryOnError => + // stop to retry when encounter duplicated key error. + case rethrow: Throwable if unrecoverableDBErr(rethrow) => + throw rethrow + case e: Throwable if requestsAsyncRetryEnabled && asyncRetryOnError => error(s"Error inserting metadata for session ${metadata.identifier}", e) addMetadataRetryRequest(InsertMetadata(metadata)) } @@ -156,11 +177,11 @@ class MetadataManager extends AbstractService("MetadataManager") { withMetadataRequestMetrics(_metadataStore.getMetadataList(filter, from, size, true)) } - def updateMetadata(metadata: Metadata, retryOnError: Boolean = true): Unit = { + def updateMetadata(metadata: Metadata, asyncRetryOnError: Boolean = true): Unit = { try { withMetadataRequestMetrics(_metadataStore.updateMetadata(metadata)) } catch { - case e: Throwable if retryOnError => + case e: Throwable if requestsAsyncRetryEnabled && asyncRetryOnError => error(s"Error updating metadata for session ${metadata.identifier}", e) addMetadataRetryRequest(UpdateMetadata(metadata)) } @@ -171,35 +192,33 @@ class MetadataManager extends AbstractService("MetadataManager") { } private def startMetadataCleaner(): Unit = { - val cleanerEnabled = conf.get(KyuubiConf.METADATA_CLEANER_ENABLED) val stateMaxAge = conf.get(METADATA_MAX_AGE) - - if (cleanerEnabled) { - val interval = conf.get(KyuubiConf.METADATA_CLEANER_INTERVAL) - val cleanerTask: Runnable = () => { - try { - withMetadataRequestMetrics(_metadataStore.cleanupMetadataByAge(stateMaxAge)) - } catch { - case e: Throwable => error("Error cleaning up the metadata by age", e) - } + val interval = conf.get(KyuubiConf.METADATA_CLEANER_INTERVAL) + val cleanerTask: Runnable = () => { + try { + withMetadataRequestMetrics(_metadataStore.cleanupMetadataByAge(stateMaxAge)) + } catch { + case e: Throwable => error("Error cleaning up the metadata by age", e) } - - metadataCleaner.scheduleWithFixedDelay( - cleanerTask, - interval, - interval, - TimeUnit.MILLISECONDS) } + + metadataCleaner.scheduleWithFixedDelay( + cleanerTask, + interval, + interval, + TimeUnit.MILLISECONDS) } def addMetadataRetryRequest(request: MetadataRequest): Unit = { - if (identifierRequestsRetryRefs.size() > maxMetadataRequestsRetryRefs) { + val maxRequestsAsyncRetryRefs: Int = + conf.get(KyuubiConf.METADATA_REQUEST_ASYNC_RETRY_QUEUE_SIZE) + if (identifierRequestsAsyncRetryRefs.size() > maxRequestsAsyncRetryRefs) { throw new KyuubiException( "The number of metadata requests retry instances exceeds the limitation:" + - maxMetadataRequestsRetryRefs) + maxRequestsAsyncRetryRefs) } val identifier = request.metadata.identifier - val ref = identifierRequestsRetryRefs.computeIfAbsent( + val ref = identifierRequestsAsyncRetryRefs.computeIfAbsent( identifier, identifier => { val ref = new MetadataRequestsRetryRef @@ -207,30 +226,29 @@ class MetadataManager extends AbstractService("MetadataManager") { ref }) ref.addRetryingMetadataRequest(request) - identifierRequestsRetryRefs.putIfAbsent(identifier, ref) + identifierRequestsAsyncRetryRefs.putIfAbsent(identifier, ref) MetricsSystem.tracing(_.markMeter(MetricsConstants.METADATA_REQUEST_RETRYING)) } def getMetadataRequestsRetryRef(identifier: String): MetadataRequestsRetryRef = { - identifierRequestsRetryRefs.get(identifier) + identifierRequestsAsyncRetryRefs.get(identifier) } def deRegisterRequestsRetryRef(identifier: String): Unit = { - identifierRequestsRetryRefs.remove(identifier) - identifierRequestsRetryingCounts.remove(identifier) + identifierRequestsAsyncRetryRefs.remove(identifier) + identifierRequestsAsyncRetryingCounts.remove(identifier) } - private def startMetadataRequestsRetryTrigger(): Unit = { - val interval = conf.get(KyuubiConf.METADATA_REQUEST_RETRY_INTERVAL) + private def startMetadataRequestsAsyncRetryTrigger(): Unit = { val triggerTask = new Runnable { override def run(): Unit = { - identifierRequestsRetryRefs.forEach { (id, ref) => + identifierRequestsAsyncRetryRefs.forEach { (id, ref) => if (!ref.hasRemainingRequests()) { - identifierRequestsRetryRefs.remove(id) - identifierRequestsRetryingCounts.remove(id) + identifierRequestsAsyncRetryRefs.remove(id) + identifierRequestsAsyncRetryingCounts.remove(id) } else { - val retryingCount = - identifierRequestsRetryingCounts.computeIfAbsent(id, _ => new AtomicInteger(0)) + val retryingCount = identifierRequestsAsyncRetryingCounts + .computeIfAbsent(id, _ => new AtomicInteger(0)) if (retryingCount.get() == 0) { val retryTask = new Runnable { @@ -241,12 +259,9 @@ class MetadataManager extends AbstractService("MetadataManager") { while (request != null) { request match { case insert: InsertMetadata => - insertMetadata(insert.metadata, retryOnError = false) - + insertMetadata(insert.metadata, asyncRetryOnError = false) case update: UpdateMetadata => - updateMetadata(update.metadata, retryOnError = false) - - case _ => + updateMetadata(update.metadata, asyncRetryOnError = false) } ref.metadataRequests.remove(request) MetricsSystem.tracing(_.markMeter( @@ -265,22 +280,21 @@ class MetadataManager extends AbstractService("MetadataManager") { try { retryingCount.incrementAndGet() - requestsRetryExecutor.submit(retryTask) + requestsAsyncRetryExecutor.submit(retryTask) } catch { case e: Throwable => error(s"Error submitting metadata retry requests for $id", e) retryingCount.decrementAndGet() } } - } } } } - requestsRetryTrigger.scheduleWithFixedDelay( + requestsAsyncRetryTrigger.scheduleWithFixedDelay( triggerTask, - interval, - interval, + requestsRetryInterval, + requestsRetryInterval, TimeUnit.MILLISECONDS) } } @@ -319,6 +333,7 @@ object MetadataManager extends Logging { batchMetadata.kyuubiInstance, batchState, batchMetadata.createTime, - batchMetadata.endTime) + batchMetadata.endTime, + Map.empty[String, String].asJava) } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataRequest.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataRequest.scala index dcee6466bad..2c121edfeb1 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataRequest.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataRequest.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.server.metadata import org.apache.kyuubi.server.metadata.api.Metadata -trait MetadataRequest { +sealed trait MetadataRequest { def metadata: Metadata } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala index 151d846d8ca..488039e2baa 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala @@ -39,6 +39,7 @@ import org.apache.kyuubi.server.metadata.api.{Metadata, MetadataFilter} import org.apache.kyuubi.server.metadata.jdbc.DatabaseType._ import org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStoreConf._ import org.apache.kyuubi.session.SessionType +import org.apache.kyuubi.util.JdbcUtils class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { import JDBCMetadataStore._ @@ -68,11 +69,10 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { hikariConfig.setPoolName("jdbc-metadata-store-pool") @VisibleForTesting - private[kyuubi] val hikariDataSource = new HikariDataSource(hikariConfig) + implicit private[kyuubi] val hikariDataSource = new HikariDataSource(hikariConfig) private val mapper = new ObjectMapper().registerModule(DefaultScalaModule) - private val terminalStates = - OperationState.terminalStates.map(x => s"'${x.toString}'").mkString(", ") + private val terminalStates = OperationState.terminalStates.map(x => s"'$x'").mkString(", ") if (conf.get(METADATA_STORE_JDBC_DATABASE_SCHEMA_INIT)) { initSchema() @@ -81,7 +81,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { private def initSchema(): Unit = { getInitSchema(dbType).foreach { schema => val ddlStatements = schema.trim.split(";") - withConnection() { connection => + JdbcUtils.withConnection { connection => Utils.tryLogNonFatalError { ddlStatements.foreach { ddlStatement => execute(connection, ddlStatement) @@ -96,37 +96,49 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { private[jdbc] def getInitSchema(dbType: DatabaseType): Option[String] = { val classLoader = Utils.getContextOrKyuubiClassLoader val schemaPackage = s"sql/${dbType.toString.toLowerCase}" - val schemaUrlPattern = """^metadata-store-schema-(\d+)\.(\d+)\.(\d+)\.(.*)\.sql$""".r - val schemaUrls = ListBuffer[String]() - Option(classLoader.getResource(schemaPackage)).map(_.toURI).foreach { uri => + Option(classLoader.getResource(schemaPackage)).map(_.toURI).flatMap { uri => val pathNames = if (uri.getScheme == "jar") { val fs = FileSystems.newFileSystem(uri, Map.empty[String, AnyRef].asJava) try { Files.walk(fs.getPath(schemaPackage), 1).iterator().asScala.map( _.getFileName.toString).filter { name => - schemaUrlPattern.findFirstMatchIn(name).isDefined + SCHEMA_URL_PATTERN.findFirstMatchIn(name).isDefined }.toArray } finally { fs.close() } } else { Paths.get(uri).toFile.listFiles((_, name) => { - schemaUrlPattern.findFirstMatchIn(name).isDefined + SCHEMA_URL_PATTERN.findFirstMatchIn(name).isDefined }).map(_.getName) } - pathNames.foreach(name => schemaUrls += s"$schemaPackage/$name") + getLatestSchemaUrl(pathNames).map(name => s"$schemaPackage/$name").map { schemaUrl => + val inputStream = classLoader.getResourceAsStream(schemaUrl) + try { + new BufferedReader(new InputStreamReader(inputStream)).lines() + .collect(Collectors.joining("\n")) + } finally { + inputStream.close() + } + } } + } - schemaUrls.sorted.lastOption.map { schemaUrl => - val inputStream = classLoader.getResourceAsStream(schemaUrl) - try { - new BufferedReader(new InputStreamReader(inputStream)).lines() - .collect(Collectors.joining("\n")) - } finally { - inputStream.close() - } + def getSchemaVersion(schemaUrl: String): (Int, Int, Int) = + SCHEMA_URL_PATTERN.findFirstMatchIn(schemaUrl) match { + case Some(m) => (m.group(1).toInt, m.group(2).toInt, m.group(3).toInt) + case _ => throw new KyuubiException(s"Invalid schema url: $schemaUrl") } + + def getLatestSchemaUrl(schemaUrls: Seq[String]): Option[String] = { + schemaUrls.sortWith { (u1, u2) => + val v1 = getSchemaVersion(u1) + val v2 = getSchemaVersion(u2) + v1._1 > v2._1 || + (v1._1 == v2._1 && v1._2 > v2._2) || + (v1._1 == v2._1 && v1._2 == v2._2 && v1._3 > v2._3) + }.headOption } override def close(): Unit = { @@ -156,7 +168,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { |VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) |""".stripMargin - withConnection() { connection => + JdbcUtils.withConnection { connection => execute( connection, query, @@ -186,7 +198,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { s"SELECT $METADATA_ALL_COLUMNS FROM $METADATA_TABLE WHERE identifier = ?" } - withConnection() { connection => + JdbcUtils.withConnection { connection => withResultSet(connection, query, identifier) { rs => buildMetadata(rs, stateOnly).headOption.orNull } @@ -207,44 +219,44 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { } val whereConditions = ListBuffer[String]() Option(filter.sessionType).foreach { sessionType => - whereConditions += " session_type = ?" + whereConditions += "session_type = ?" params += sessionType.toString } Option(filter.engineType).filter(_.nonEmpty).foreach { engineType => - whereConditions += " UPPER(engine_type) = ? " + whereConditions += "UPPER(engine_type) = ?" params += engineType.toUpperCase(Locale.ROOT) } Option(filter.username).filter(_.nonEmpty).foreach { username => - whereConditions += " user_name = ? " + whereConditions += "user_name = ?" params += username } Option(filter.state).filter(_.nonEmpty).foreach { state => - whereConditions += " state = ? " + whereConditions += "state = ?" params += state.toUpperCase(Locale.ROOT) } Option(filter.kyuubiInstance).filter(_.nonEmpty).foreach { kyuubiInstance => - whereConditions += " kyuubi_instance = ? " + whereConditions += "kyuubi_instance = ?" params += kyuubiInstance } if (filter.createTime > 0) { - whereConditions += " create_time >= ? " + whereConditions += "create_time >= ?" params += filter.createTime } if (filter.endTime > 0) { - whereConditions += " end_time > 0 " - whereConditions += " end_time <= ? " + whereConditions += "end_time > 0" + whereConditions += "end_time <= ?" params += filter.endTime } if (filter.peerInstanceClosed) { - whereConditions += " peer_instance_closed = ? " + whereConditions += "peer_instance_closed = ?" params += filter.peerInstanceClosed } if (whereConditions.nonEmpty) { - queryBuilder.append(whereConditions.mkString(" WHERE ", " AND ", " ")) + queryBuilder.append(whereConditions.mkString(" WHERE ", " AND ", "")) } - queryBuilder.append(" ORDER BY key_id ") + queryBuilder.append(" ORDER BY key_id") val query = databaseAdaptor.addLimitAndOffsetToQuery(queryBuilder.toString(), size, from) - withConnection() { connection => + JdbcUtils.withConnection { connection => withResultSet(connection, query, params: _*) { rs => buildMetadata(rs, stateOnly) } @@ -258,49 +270,49 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { queryBuilder.append(s"UPDATE $METADATA_TABLE") val setClauses = ListBuffer[String]() Option(metadata.state).foreach { _ => - setClauses += " state = ? " + setClauses += "state = ?" params += metadata.state } if (metadata.endTime > 0) { - setClauses += " end_time = ? " + setClauses += "end_time = ?" params += metadata.endTime } if (metadata.engineOpenTime > 0) { - setClauses += " engine_open_time = ? " + setClauses += "engine_open_time = ?" params += metadata.engineOpenTime } Option(metadata.engineId).foreach { _ => - setClauses += " engine_id = ? " + setClauses += "engine_id = ?" params += metadata.engineId } Option(metadata.engineName).foreach { _ => - setClauses += " engine_name = ? " + setClauses += "engine_name = ?" params += metadata.engineName } Option(metadata.engineUrl).foreach { _ => - setClauses += " engine_url = ? " + setClauses += "engine_url = ?" params += metadata.engineUrl } Option(metadata.engineState).foreach { _ => - setClauses += " engine_state = ? " + setClauses += "engine_state = ?" params += metadata.engineState } metadata.engineError.foreach { error => - setClauses += " engine_error = ? " + setClauses += "engine_error = ?" params += error } if (metadata.peerInstanceClosed) { - setClauses += " peer_instance_closed = ? " + setClauses += "peer_instance_closed = ?" params += metadata.peerInstanceClosed } if (setClauses.nonEmpty) { - queryBuilder.append(setClauses.mkString(" SET ", " , ", " ")) + queryBuilder.append(setClauses.mkString(" SET ", ", ", "")) } - queryBuilder.append(" WHERE identifier = ? ") + queryBuilder.append(" WHERE identifier = ?") params += metadata.identifier val query = queryBuilder.toString() - withConnection() { connection => + JdbcUtils.withConnection { connection => withUpdateCount(connection, query, params: _*) { updateCount => if (updateCount == 0) { throw new KyuubiException( @@ -312,7 +324,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { override def cleanupMetadataByIdentifier(identifier: String): Unit = { val query = s"DELETE FROM $METADATA_TABLE WHERE identifier = ?" - withConnection() { connection => + JdbcUtils.withConnection { connection => execute(connection, query, identifier) } } @@ -320,7 +332,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { override def cleanupMetadataByAge(maxAge: Long): Unit = { val minEndTime = System.currentTimeMillis() - maxAge val query = s"DELETE FROM $METADATA_TABLE WHERE state IN ($terminalStates) AND end_time < ?" - withConnection() { connection => + JdbcUtils.withConnection { connection => execute(connection, query, minEndTime) } } @@ -391,7 +403,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { } private def execute(conn: Connection, sql: String, params: Any*): Unit = { - debug(s"executing sql $sql") + debug(s"execute sql: $sql, with params: ${params.mkString(", ")}") var statement: PreparedStatement = null try { statement = conn.prepareStatement(sql) @@ -399,7 +411,9 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { statement.execute() } catch { case e: SQLException => - throw new KyuubiException(s"Error executing $sql:" + e.getMessage, e) + throw new KyuubiException( + s"Error executing sql: $sql, with params: ${params.mkString(", ")}. ${e.getMessage}", + e) } finally { if (statement != null) { Utils.tryLogNonFatalError(statement.close()) @@ -411,7 +425,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { conn: Connection, sql: String, params: Any*)(f: ResultSet => T): T = { - debug(s"executing sql $sql with result set") + debug(s"executeQuery sql: $sql, with params: ${params.mkString(", ")}") var statement: PreparedStatement = null var resultSet: ResultSet = null try { @@ -421,7 +435,9 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { f(resultSet) } catch { case e: SQLException => - throw new KyuubiException(e.getMessage, e) + throw new KyuubiException( + s"Error executing sql: $sql, with params: ${params.mkString(", ")}. ${e.getMessage}", + e) } finally { if (resultSet != null) { Utils.tryLogNonFatalError(resultSet.close()) @@ -436,7 +452,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { conn: Connection, sql: String, params: Any*)(f: Int => T): T = { - debug(s"executing sql $sql with update count") + debug(s"executeUpdate sql: $sql, with params: ${params.mkString(", ")}") var statement: PreparedStatement = null try { statement = conn.prepareStatement(sql) @@ -444,7 +460,9 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { f(statement.executeUpdate()) } catch { case e: SQLException => - throw new KyuubiException(e.getMessage, e) + throw new KyuubiException( + s"Error executing sql: $sql, with params: ${params.mkString(", ")}. ${e.getMessage}", + e) } finally { if (statement != null) { Utils.tryLogNonFatalError(statement.close()) @@ -467,22 +485,6 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { } } - private def withConnection[T](autoCommit: Boolean = true)(f: Connection => T): T = { - var connection: Connection = null - try { - connection = hikariDataSource.getConnection - connection.setAutoCommit(autoCommit) - f(connection) - } catch { - case e: SQLException => - throw new KyuubiException(e.getMessage, e) - } finally { - if (connection != null) { - Utils.tryLogNonFatalError(connection.close()) - } - } - } - private def valueAsString(obj: Any): String = { mapper.writeValueAsString(obj) } @@ -505,6 +507,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { } object JDBCMetadataStore { + private val SCHEMA_URL_PATTERN = """^metadata-store-schema-(\d+)\.(\d+)\.(\d+)\.(.*)\.sql$""".r private val METADATA_TABLE = "metadata" private val METADATA_STATE_ONLY_COLUMNS = Seq( "identifier", diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala index 27b9bc58e11..de30b6e6689 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala @@ -19,13 +19,12 @@ package org.apache.kyuubi.server.metadata.jdbc import java.util.{Locale, Properties} -import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf, OptionalConfigEntry} +import org.apache.kyuubi.config.{ConfigEntry, KyuubiConf, OptionalConfigEntry} +import org.apache.kyuubi.config.KyuubiConf.buildConf object JDBCMetadataStoreConf { final val METADATA_STORE_JDBC_DATASOURCE_PREFIX = "kyuubi.metadata.store.jdbc.datasource" - private def buildConf(key: String): ConfigBuilder = KyuubiConf.buildConf(key) - /** Get metadata store jdbc datasource properties. */ def getMetadataStoreJDBCDataSourceProperties(conf: KyuubiConf): Properties = { val datasourceProperties = new Properties() @@ -38,11 +37,11 @@ object JDBCMetadataStoreConf { val METADATA_STORE_JDBC_DATABASE_TYPE: ConfigEntry[String] = buildConf("kyuubi.metadata.store.jdbc.database.type") .doc("The database type for server jdbc metadata store.
                " + - "
              • DERBY: Apache Derby, jdbc driver `org.apache.derby.jdbc.AutoloadedDriver`.
              • " + - "
              • MYSQL: MySQL, jdbc driver `com.mysql.jdbc.Driver`.
              • " + - "
              • CUSTOM: User-defined database type, need to specify corresponding jdbc driver.
              • " + - " Note that: The jdbc datasource is powered by HiKariCP, for datasource properties," + - " please specify them with prefix: kyuubi.metadata.store.jdbc.datasource." + + "
              • DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
              • " + + "
              • MYSQL: MySQL, JDBC driver `com.mysql.jdbc.Driver`.
              • " + + "
              • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
              • " + + " Note that: The JDBC datasource is powered by HiKariCP, for datasource properties," + + " please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource." + " For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000.") .version("1.6.0") .serverOnly @@ -52,7 +51,7 @@ object JDBCMetadataStoreConf { val METADATA_STORE_JDBC_DATABASE_SCHEMA_INIT: ConfigEntry[Boolean] = buildConf("kyuubi.metadata.store.jdbc.database.schema.init") - .doc("Whether to init the jdbc metadata store database schema.") + .doc("Whether to init the JDBC metadata store database schema.") .version("1.6.0") .serverOnly .booleanConf @@ -68,9 +67,10 @@ object JDBCMetadataStoreConf { val METADATA_STORE_JDBC_URL: ConfigEntry[String] = buildConf("kyuubi.metadata.store.jdbc.url") - .doc("The jdbc url for server jdbc metadata store. By defaults, it is a DERBY in-memory" + + .doc("The JDBC url for server JDBC metadata store. By default, it is a DERBY in-memory" + " database url, and the state information is not shared across kyuubi instances. To" + - " enable multiple kyuubi instances high available, please specify a production jdbc url.") + " enable high availability for multiple kyuubi instances," + + " please specify a production JDBC url.") .version("1.6.0") .serverOnly .stringConf @@ -78,7 +78,7 @@ object JDBCMetadataStoreConf { val METADATA_STORE_JDBC_USER: ConfigEntry[String] = buildConf("kyuubi.metadata.store.jdbc.user") - .doc("The username for server jdbc metadata store.") + .doc("The username for server JDBC metadata store.") .version("1.6.0") .serverOnly .stringConf @@ -86,7 +86,7 @@ object JDBCMetadataStoreConf { val METADATA_STORE_JDBC_PASSWORD: ConfigEntry[String] = buildConf("kyuubi.metadata.store.jdbc.password") - .doc("The password for server jdbc metadata store.") + .doc("The password for server JDBC metadata store.") .version("1.6.0") .serverOnly .stringConf diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiTrinoOperationTranslator.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiTrinoOperationTranslator.scala index 6ec9fc1c80e..c78cb351edf 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiTrinoOperationTranslator.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/KyuubiTrinoOperationTranslator.scala @@ -19,30 +19,22 @@ package org.apache.kyuubi.server.trino.api import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.operation.OperationHandle import org.apache.kyuubi.service.BackendService +import org.apache.kyuubi.session.SessionHandle import org.apache.kyuubi.sql.parser.trino.KyuubiTrinoFeParser import org.apache.kyuubi.sql.plan.PassThroughNode -import org.apache.kyuubi.sql.plan.trino.{GetCatalogs, GetColumns, GetSchemas, GetTables, GetTableTypes, GetTypeInfo} +import org.apache.kyuubi.sql.plan.trino.{GetCatalogs, GetColumns, GetPrimaryKeys, GetSchemas, GetTables, GetTableTypes, GetTypeInfo} class KyuubiTrinoOperationTranslator(backendService: BackendService) { lazy val parser = new KyuubiTrinoFeParser() def transform( statement: String, - user: String, - ipAddress: String, + sessionHandle: SessionHandle, configs: Map[String, String], runAsync: Boolean, queryTimeout: Long): OperationHandle = { - val sessionHandle = backendService.openSession( - TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V11, - user, - "", - ipAddress, - configs) parser.parsePlan(statement) match { case GetSchemas(catalogName, schemaPattern) => backendService.getSchemas(sessionHandle, catalogName, schemaPattern) @@ -68,6 +60,11 @@ class KyuubiTrinoOperationTranslator(backendService: BackendService) { schemaPattern, tableNamePattern, colNamePattern) + case GetPrimaryKeys() => + val operationHandle = backendService.getPrimaryKeys(sessionHandle, null, null, null) + // The trino implementation always returns empty. + operationHandle.setHasResultSet(false) + operationHandle case PassThroughNode() => backendService.executeStatement(sessionHandle, statement, configs, runAsync, queryTimeout) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala new file mode 100644 index 00000000000..4e768b04a41 --- /dev/null +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.server.trino.api + +import java.net.URI +import java.security.SecureRandom +import java.util.Objects.requireNonNull +import java.util.UUID +import java.util.concurrent.atomic.AtomicLong +import javax.ws.rs.WebApplicationException +import javax.ws.rs.core.{Response, UriInfo} + +import scala.collection.mutable + +import Slug.Context.{EXECUTING_QUERY, QUEUED_QUERY} +import com.google.common.hash.Hashing +import io.trino.client.QueryResults +import org.apache.hive.service.rpc.thrift.{TBoolValue, TColumnDesc, TColumnValue, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TProtocolVersion, TRow, TRowSet, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} + +import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle, OperationState, OperationStatus} +import org.apache.kyuubi.operation.OperationState.{FINISHED, INITIALIZED, OperationState, PENDING} +import org.apache.kyuubi.server.trino.api.Query.KYUUBI_SESSION_ID +import org.apache.kyuubi.service.BackendService +import org.apache.kyuubi.service.TFrontendService.OK_STATUS +import org.apache.kyuubi.session.SessionHandle + +case class Query( + queryId: QueryId, + context: TrinoContext, + be: BackendService) { + + private val QUEUED_QUERY_PATH = "/v1/statement/queued/" + private val EXECUTING_QUERY_PATH = "/v1/statement/executing" + + private val slug: Slug = Slug.createNewWithUUID(queryId.getQueryId) + private val lastToken = new AtomicLong + + private val defaultMaxRows = 1000 + private val defaultFetchOrientation = FetchOrientation.withName("FETCH_NEXT") + + def getQueryResults(token: Long, uriInfo: UriInfo, maxWait: Long = 0): QueryResults = { + val status = + be.getOperationStatus(queryId.operationHandle, Some(maxWait)) + val nextUri = if (status.exception.isEmpty) { + getNextUri(token + 1, uriInfo, toSlugContext(status.state)) + } else null + val queryHtmlUri = uriInfo.getRequestUriBuilder + .replacePath("ui/query.html").replaceQuery(queryId.getQueryId).build() + + status.state match { + case FINISHED => + val metaData = be.getResultSetMetadata(queryId.operationHandle) + val resultSet = be.fetchResults( + queryId.operationHandle, + defaultFetchOrientation, + defaultMaxRows, + false) + TrinoContext.createQueryResults( + queryId.getQueryId, + nextUri, + queryHtmlUri, + status, + Option(metaData), + Option(resultSet)) + case _ => + TrinoContext.createQueryResults( + queryId.getQueryId, + nextUri, + queryHtmlUri, + status) + } + } + + def getPrepareQueryResults( + token: Long, + uriInfo: UriInfo, + maxWait: Long = 0): QueryResults = { + val status = OperationStatus(OperationState.FINISHED, 0, 0, 0, 0, false) + val nextUri = null + val queryHtmlUri = uriInfo.getRequestUriBuilder + .replacePath("ui/query.html").replaceQuery(queryId.getQueryId).build() + + val columns = new TGetResultSetMetadataResp() + columns.setStatus(OK_STATUS) + val tColumnDesc = new TColumnDesc() + tColumnDesc.setColumnName("result") + val desc = new TTypeDesc + desc.addToTypes(TTypeEntry.primitiveEntry(new TPrimitiveTypeEntry(TTypeId.BOOLEAN_TYPE))) + tColumnDesc.setTypeDesc(desc) + tColumnDesc.setPosition(0) + val schema = new TTableSchema() + schema.addToColumns(tColumnDesc) + columns.setSchema(schema) + + val rows = new java.util.ArrayList[TRow] + val trow = new TRow() + val value = new TBoolValue() + value.setValue(true) + trow.addToColVals(TColumnValue.boolVal(value)) + rows.add(trow) + val rowSet = new TRowSet(0, rows) + + TrinoContext.createQueryResults( + queryId.getQueryId, + nextUri, + queryHtmlUri, + status, + Option(columns), + Option(rowSet), + updateType = "PREPARE") + } + + def getLastToken: Long = this.lastToken.get() + + def getSlug: Slug = this.slug + + def cancel: Unit = clear + + private def clear = { + be.closeOperation(queryId.operationHandle) + context.session.get(KYUUBI_SESSION_ID).foreach { id => + be.closeSession(SessionHandle.fromUUID(id)) + } + } + + private def setToken(token: Long): Unit = { + val lastToken = this.lastToken.get + if (token != lastToken && token != lastToken + 1) { + throw new WebApplicationException(Response.Status.GONE) + } + this.lastToken.compareAndSet(lastToken, token) + } + + private def getNextUri(token: Long, uriInfo: UriInfo, slugContext: Slug.Context.Context): URI = { + val path = slugContext match { + case QUEUED_QUERY => QUEUED_QUERY_PATH + case EXECUTING_QUERY => EXECUTING_QUERY_PATH + } + + uriInfo.getBaseUriBuilder.replacePath(path) + .path(queryId.getQueryId) + .path(slug.makeSlug(slugContext, token)) + .path(String.valueOf(token)) + .replaceQuery("") + .build() + } + + private def toSlugContext(state: OperationState): Slug.Context.Context = { + state match { + case INITIALIZED | PENDING => Slug.Context.QUEUED_QUERY + case _ => Slug.Context.EXECUTING_QUERY + } + } + +} + +object Query { + + val KYUUBI_SESSION_ID = "kyuubi.session.id" + + def apply( + statement: String, + context: TrinoContext, + translator: KyuubiTrinoOperationTranslator, + backendService: BackendService, + queryTimeout: Long = 0): Query = { + val sessionHandle = getOrCreateSession(context, backendService) + val operationHandle = translator.transform( + statement, + sessionHandle, + context.session, + true, + queryTimeout) + val sessionWithId = + context.session + (KYUUBI_SESSION_ID -> sessionHandle.identifier.toString) + val updatedContext = context.copy(session = sessionWithId) + Query(QueryId(operationHandle), updatedContext, backendService) + } + + def apply( + statementId: String, + statement: String, + context: TrinoContext, + backendService: BackendService): Query = { + val sessionHandle = getOrCreateSession(context, backendService) + val sessionWithId = + context.session + (KYUUBI_SESSION_ID -> sessionHandle.identifier.toString) + Query( + queryId = QueryId(new OperationHandle(UUID.randomUUID())), + context.copy(preparedStatement = Map(statementId -> statement), session = sessionWithId), + backendService) + } + + def apply(id: String, context: TrinoContext, backendService: BackendService): Query = { + Query(QueryId(id), context, backendService) + } + + private def getOrCreateSession( + context: TrinoContext, + backendService: BackendService): SessionHandle = { + context.session.get(KYUUBI_SESSION_ID).map(SessionHandle.fromUUID).getOrElse { + // transform Trino information to session and engine as far as possible. + val trinoInfo = new mutable.HashMap[String, String]() + context.clientInfo.foreach { info => + trinoInfo.put("trino.client.info", info) + } + context.source.foreach { source => + trinoInfo.put("trino.request.source", source) + } + context.traceToken.foreach { traceToken => + trinoInfo.put("trino.trace.token", traceToken) + } + context.timeZone.foreach { timeZone => + trinoInfo.put("trino.time.zone", timeZone) + } + context.language.foreach { language => + trinoInfo.put("trino.language", language) + } + if (context.clientTags.nonEmpty) { + trinoInfo.put("trino.client.info", context.clientTags.mkString(",")) + } + + val newSessionConfigs = context.session ++ trinoInfo + backendService.openSession( + TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V11, + context.user, + "", + context.remoteUserAddress.getOrElse(""), + newSessionConfigs) + } + } + +} + +case class QueryId(operationHandle: OperationHandle) { + def getQueryId: String = operationHandle.identifier.toString +} + +object QueryId { + def apply(id: String): QueryId = QueryId(OperationHandle(id)) +} + +object Slug { + + object Context extends Enumeration { + type Context = Value + val QUEUED_QUERY, EXECUTING_QUERY = Value + } + + private val RANDOM = new SecureRandom + + def createNew: Slug = { + val randomBytes = new Array[Byte](16) + RANDOM.nextBytes(randomBytes) + new Slug(randomBytes) + } + + def createNewWithUUID(uuid: String): Slug = { + val uuidBytes = UUID.fromString(uuid).toString.getBytes("UTF-8") + new Slug(uuidBytes) + } +} + +case class Slug(slugKey: Array[Byte]) { + val hmac = Hashing.hmacSha1(requireNonNull(slugKey, "slugKey is null")) + + def makeSlug(context: Slug.Context.Context, token: Long): String = { + "y" + hmac.newHasher.putInt(context.id).putLong(token).hash.toString + } + + def isValid(context: Slug.Context.Context, slug: String, token: Long): Boolean = + makeSlug(context, token) == slug +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala index 8f3131f61c9..16fc0388a2c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala @@ -18,34 +18,43 @@ package org.apache.kyuubi.server.trino.api import java.io.UnsupportedEncodingException -import java.net.{URLDecoder, URLEncoder} +import java.net.{URI, URLDecoder, URLEncoder} +import java.util +import java.util.Optional import javax.ws.rs.core.{HttpHeaders, Response} import scala.collection.JavaConverters._ +import com.google.common.collect.ImmutableList +import io.trino.client.{ClientStandardTypes, ClientTypeSignature, ClientTypeSignatureParameter, Column, NamedClientTypeSignature, QueryError, QueryResults, RowFieldName, StatementStats, Warning} import io.trino.client.ProtocolHeaders.TRINO_HEADERS -import io.trino.client.QueryResults +import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TGetResultSetMetadataResp, TRowSet, TTypeEntry, TTypeId} +import org.apache.kyuubi.operation.OperationState.FINISHED +import org.apache.kyuubi.operation.OperationStatus +import org.apache.kyuubi.server.trino.api.Query.KYUUBI_SESSION_ID + +// TODO: Support replace `preparedStatement` for Trino-jdbc /** * The description and functionality of trino request * and response's context * - * @param user Specifies the session user, must be supplied with every query - * @param timeZone The timezone for query processing + * @param user Specifies the session user, must be supplied with every query + * @param timeZone The timezone for query processing * @param clientCapabilities Exclusive for trino server - * @param source This supplies the name of the software that submitted the query, - * e.g. `trino-jdbc` or `trino-cli` by default - * @param catalog The catalog context for query processing, will be set response - * @param schema The schema context for query processing - * @param language The language to use when processing the query and formatting results, - * formatted as a Java Locale string, e.g., en-US for US English - * @param traceToken Trace token for correlating requests across systems - * @param clientInfo Extra information about the client - * @param clientTags Client tags for selecting resource groups. Example: abc,xyz - * @param preparedStatement `preparedStatement` are kv pairs, where the names - * are names of previously prepared SQL statements, - * and the values are keys that identify the - * executable form of the named prepared statements + * @param source This supplies the name of the software that submitted the query, + * e.g. `trino-jdbc` or `trino-cli` by default + * @param catalog The catalog context for query processing, will be set response + * @param schema The schema context for query processing + * @param language The language to use when processing the query and formatting results, + * formatted as a Java Locale string, e.g., en-US for US English + * @param traceToken Trace token for correlating requests across systems + * @param clientInfo Extra information about the client + * @param clientTags Client tags for selecting resource groups. Example: abc,xyz + * @param preparedStatement `preparedStatement` are kv pairs, where the names + * are names of previously prepared SQL statements, + * and the values are keys that identify the + * executable form of the named prepared statements */ case class TrinoContext( user: String, @@ -54,6 +63,7 @@ case class TrinoContext( source: Option[String] = None, catalog: Option[String] = None, schema: Option[String] = None, + remoteUserAddress: Option[String] = None, language: Option[String] = None, traceToken: Option[String] = None, clientInfo: Option[String] = None, @@ -63,10 +73,16 @@ case class TrinoContext( object TrinoContext { - def apply(headers: HttpHeaders): TrinoContext = { - apply(headers.getRequestHeaders.asScala.toMap.map { + private val defaultWarning: util.List[Warning] = new util.ArrayList[Warning]() + private val GENERIC_INTERNAL_ERROR_CODE = 65536 + private val GENERIC_INTERNAL_ERROR_NAME = "GENERIC_INTERNAL_ERROR_NAME" + private val GENERIC_INTERNAL_ERROR_TYPE = "INTERNAL_ERROR" + + def apply(headers: HttpHeaders, remoteAddress: Option[String]): TrinoContext = { + val context = apply(headers.getRequestHeaders.asScala.toMap.map { case (k, v) => (k, v.asScala.toList) }) + context.copy(remoteUserAddress = remoteAddress) } def apply(headers: Map[String, List[String]]): TrinoContext = { @@ -125,19 +141,20 @@ object TrinoContext { } } - // TODO: Building response with TrinoContext and other information def buildTrinoResponse(qr: QueryResults, trinoContext: TrinoContext): Response = { val responseBuilder = Response.ok(qr) - trinoContext.catalog.foreach( - responseBuilder.header(TRINO_HEADERS.responseSetCatalog, _)) - trinoContext.schema.foreach( - responseBuilder.header(TRINO_HEADERS.responseSetSchema, _)) + // Note, We have injected kyuubi session id to session context so that the next query can find + // the previous session to restore the query context. + // It's hard to follow the Trino style that set all context to http headers. + // Because we do not know the context at server side. e.g. `set k=v`, `use database`. + // We also can not inject other session context into header before we supporting to map + // query result to session context. + require(trinoContext.session.contains(KYUUBI_SESSION_ID), s"$KYUUBI_SESSION_ID must be set.") + responseBuilder.header( + TRINO_HEADERS.responseSetSession, + s"$KYUUBI_SESSION_ID=${urlEncode(trinoContext.session(KYUUBI_SESSION_ID))}") - trinoContext.session.foreach { - case (k, v) => - responseBuilder.header(TRINO_HEADERS.responseSetSession, s"${k}=${urlEncode(v)}") - } trinoContext.preparedStatement.foreach { case (k, v) => responseBuilder.header(TRINO_HEADERS.responseAddedPrepare, s"${k}=${urlEncode(v)}") @@ -147,8 +164,6 @@ object TrinoContext { responseBuilder.header(TRINO_HEADERS.responseDeallocatedPrepare, urlEncode(v)) } - responseBuilder.header(TRINO_HEADERS.responseClearSession, s"responseClearSession") - responseBuilder.header(TRINO_HEADERS.responseClearTransactionId, "false") responseBuilder.build() } @@ -166,4 +181,285 @@ object TrinoContext { throw new AssertionError(e) } + def createQueryResults( + queryId: String, + nextUri: URI, + queryHtmlUri: URI, + queryStatus: OperationStatus, + columns: Option[TGetResultSetMetadataResp] = None, + data: Option[TRowSet] = None, + updateType: String = null): QueryResults = { + + val columnList = columns match { + case Some(value) => convertTColumn(value) + case None => null + } + val rowList = data match { + case Some(value) => + Option(updateType) match { + case Some("PREPARE") => + ImmutableList.of(ImmutableList.of(true).asInstanceOf[util.List[Object]]) + case _ => convertTRowSet(value) + } + case None => null + } + + val updatedNextUri = queryStatus.state match { + case FINISHED if rowList == null || rowList.isEmpty || rowList.get(0).isEmpty => null + case _ => nextUri + } + + new QueryResults( + queryId, + queryHtmlUri, + nextUri, + updatedNextUri, + columnList, + rowList, + StatementStats.builder.setState(queryStatus.state.name()).setQueued(false) + .setElapsedTimeMillis(0).setQueuedTimeMillis(0).build(), + toQueryError(queryStatus), + defaultWarning, + updateType, + 0L) + } + + private def convertTColumn(columns: TGetResultSetMetadataResp): util.List[Column] = { + columns.getSchema.getColumns.asScala.map(c => { + val (tp, arguments) = toClientTypeSignature(c.getTypeDesc.getTypes.get(0)) + new Column(c.getColumnName, tp, new ClientTypeSignature(tp, arguments)) + }).toList.asJava + } + + private def toClientTypeSignature( + entry: TTypeEntry): (String, util.List[ClientTypeSignatureParameter]) = { + // according to `io.trino.jdbc.ColumnInfo` + if (entry.isSetPrimitiveEntry) { + entry.getPrimitiveEntry.getType match { + case TTypeId.BOOLEAN_TYPE => + (ClientStandardTypes.BOOLEAN, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.TINYINT_TYPE => + (ClientStandardTypes.TINYINT, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.SMALLINT_TYPE => + (ClientStandardTypes.SMALLINT, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.INT_TYPE => + (ClientStandardTypes.INTEGER, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.BIGINT_TYPE => + (ClientStandardTypes.BIGINT, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.FLOAT_TYPE => + (ClientStandardTypes.DOUBLE, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.DOUBLE_TYPE => + (ClientStandardTypes.DOUBLE, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.DATE_TYPE => + (ClientStandardTypes.DATE, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.TIMESTAMP_TYPE => + (ClientStandardTypes.TIMESTAMP, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.BINARY_TYPE => + (ClientStandardTypes.VARBINARY, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.DECIMAL_TYPE => + val map = entry.getPrimitiveEntry.getTypeQualifiers.getQualifiers + val precision = Option(map.get(TCLIServiceConstants.PRECISION)).map(_.getI32Value) + .getOrElse(38) + val scale = Option(map.get(TCLIServiceConstants.SCALE)).map(_.getI32Value) + .getOrElse(18) + ( + ClientStandardTypes.DECIMAL, + ImmutableList.of( + ClientTypeSignatureParameter.ofLong(precision), + ClientTypeSignatureParameter.ofLong(scale))) + case TTypeId.STRING_TYPE => + ( + ClientStandardTypes.VARCHAR, + varcharSignatureParameter) + case TTypeId.VARCHAR_TYPE => + ( + ClientStandardTypes.VARCHAR, + varcharSignatureParameter) + case TTypeId.CHAR_TYPE => + (ClientStandardTypes.CHAR, ImmutableList.of(ClientTypeSignatureParameter.ofLong(65536))) + case TTypeId.INTERVAL_YEAR_MONTH_TYPE => + ( + ClientStandardTypes.INTERVAL_YEAR_TO_MONTH, + ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.INTERVAL_DAY_TIME_TYPE => + (ClientStandardTypes.TIME_WITH_TIME_ZONE, ImmutableList.of[ClientTypeSignatureParameter]) + case TTypeId.TIMESTAMPLOCALTZ_TYPE => + ( + ClientStandardTypes.TIMESTAMP_WITH_TIME_ZONE, + ImmutableList.of[ClientTypeSignatureParameter]) + case _ => + ( + ClientStandardTypes.VARCHAR, + varcharSignatureParameter) + } + } else if (entry.isSetArrayEntry) { + // thrift does not support nested types. + // it's quite hard to follow the hive way, so always return varchar + // TODO: make complex data type more accurate + ( + ClientStandardTypes.ARRAY, + ImmutableList.of(ClientTypeSignatureParameter.ofType( + new ClientTypeSignature(ClientStandardTypes.VARCHAR, varcharSignatureParameter)))) + } else if (entry.isSetMapEntry) { + ( + ClientStandardTypes.MAP, + ImmutableList.of( + ClientTypeSignatureParameter.ofType( + new ClientTypeSignature(ClientStandardTypes.VARCHAR, varcharSignatureParameter)), + ClientTypeSignatureParameter.ofType( + new ClientTypeSignature(ClientStandardTypes.VARCHAR, varcharSignatureParameter)))) + } else if (entry.isSetStructEntry) { + val parameters = entry.getStructEntry.getNameToTypePtr.asScala.map { case (k, v) => + ClientTypeSignatureParameter.ofNamedType( + new NamedClientTypeSignature( + Optional.of(new RowFieldName(k)), + new ClientTypeSignature(ClientStandardTypes.VARCHAR, varcharSignatureParameter))) + } + ( + ClientStandardTypes.ROW, + ImmutableList.copyOf(parameters.toArray)) + } else { + throw new UnsupportedOperationException(s"Do not support type: $entry") + } + } + + private def varcharSignatureParameter: util.List[ClientTypeSignatureParameter] = { + ImmutableList.of(ClientTypeSignatureParameter.ofLong( + ClientTypeSignature.VARCHAR_UNBOUNDED_LENGTH)) + } + + def convertTRowSet(rowSet: TRowSet): util.List[util.List[Object]] = { + val dataResult = new util.LinkedList[util.List[Object]] + + if (rowSet.getColumns == null) { + return rowSet.getRows.asScala + .map(t => t.getColVals.asScala.map(v => v.getFieldValue.asInstanceOf[Object]).asJava) + .asJava + } + + rowSet.getColumns.asScala.foreach { + case tColumn if tColumn.isSetBoolVal => + val nulls = util.BitSet.valueOf(tColumn.getBoolVal.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getBoolVal.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getBoolVal.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn if tColumn.isSetByteVal => + val nulls = util.BitSet.valueOf(tColumn.getByteVal.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getByteVal.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getByteVal.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn if tColumn.isSetI16Val => + val nulls = util.BitSet.valueOf(tColumn.getI16Val.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getI16Val.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getI16Val.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn if tColumn.isSetI32Val => + val nulls = util.BitSet.valueOf(tColumn.getI32Val.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getI32Val.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getI32Val.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn if tColumn.isSetI64Val => + val nulls = util.BitSet.valueOf(tColumn.getI64Val.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getI64Val.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getI64Val.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn if tColumn.isSetDoubleVal => + val nulls = util.BitSet.valueOf(tColumn.getDoubleVal.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getDoubleVal.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getDoubleVal.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn if tColumn.isSetBinaryVal => + val nulls = util.BitSet.valueOf(tColumn.getBinaryVal.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getBinaryVal.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getBinaryVal.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + case tColumn => + val nulls = util.BitSet.valueOf(tColumn.getStringVal.getNulls) + if (dataResult.isEmpty) { + (1 to tColumn.getStringVal.getValuesSize).foreach(_ => + dataResult.add(new util.LinkedList[Object]())) + } + + tColumn.getStringVal.getValues.asScala.zipWithIndex.foreach { + case (_, rowIdx) if nulls.get(rowIdx) => + dataResult.get(rowIdx).add(null) + case (v, rowIdx) => + dataResult.get(rowIdx).add(v) + } + } + dataResult + } + + def toQueryError(queryStatus: OperationStatus): QueryError = { + val exception = queryStatus.exception + if (exception.isEmpty) { + null + } else { + new QueryError( + exception.get.getMessage, + queryStatus.state.name(), + GENERIC_INTERNAL_ERROR_CODE, + GENERIC_INTERNAL_ERROR_NAME, + GENERIC_INTERNAL_ERROR_TYPE, + null, + null) + } + } + } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoScalaObjectMapper.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoScalaObjectMapper.scala new file mode 100644 index 00000000000..33091e33878 --- /dev/null +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoScalaObjectMapper.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.server.trino.api + +import javax.ws.rs.ext.ContextResolver + +import com.fasterxml.jackson.databind.{DeserializationFeature, MapperFeature, ObjectMapper} +import com.fasterxml.jackson.datatype.jdk8.Jdk8Module + +class TrinoScalaObjectMapper extends ContextResolver[ObjectMapper] { + + // refer `io.trino.client.JsonCodec` + private lazy val mapper = new ObjectMapper() + .disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) + .disable(MapperFeature.AUTO_DETECT_CREATORS) + .disable(MapperFeature.AUTO_DETECT_FIELDS) + .disable(MapperFeature.AUTO_DETECT_SETTERS) + .disable(MapperFeature.AUTO_DETECT_GETTERS) + .disable(MapperFeature.AUTO_DETECT_IS_GETTERS) + .disable(MapperFeature.USE_GETTERS_AS_SETTERS) + .disable(MapperFeature.CAN_OVERRIDE_ACCESS_MODIFIERS) + .disable(MapperFeature.INFER_PROPERTY_MUTATORS) + .disable(MapperFeature.ALLOW_FINAL_FIELDS_AS_MUTATORS) + .registerModule(new Jdk8Module) + + override def getContext(aClass: Class[_]): ObjectMapper = mapper +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoServerConfig.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoServerConfig.scala index d1f7de336ba..298e60c9cac 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoServerConfig.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoServerConfig.scala @@ -21,6 +21,6 @@ import org.glassfish.jersey.server.ResourceConfig class TrinoServerConfig extends ResourceConfig { packages("org.apache.kyuubi.server.trino.api.v1") - register(classOf[KyuubiScalaObjectMapper]) + register(classOf[TrinoScalaObjectMapper]) register(classOf[RestExceptionMapper]) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/ApiRootResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/ApiRootResource.scala index fa023637800..c703d1e20bf 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/ApiRootResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/ApiRootResource.scala @@ -37,8 +37,7 @@ private[v1] class ApiRootResource extends ApiRequestContext { @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Get the version of Kyuubi server.") @GET @Path("version") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/StatementResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/StatementResource.scala index 3d149b5f346..124b8468857 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/StatementResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/StatementResource.scala @@ -17,28 +17,42 @@ package org.apache.kyuubi.server.trino.api.v1 +import java.util +import java.util.UUID import javax.ws.rs._ -import javax.ws.rs.core.{Context, HttpHeaders, MediaType} +import javax.ws.rs.core.{Context, HttpHeaders, MediaType, Response, UriInfo} +import javax.ws.rs.core.MediaType.TEXT_PLAIN_TYPE +import javax.ws.rs.core.Response.Status.{BAD_REQUEST, NOT_FOUND} +import scala.util.Try +import scala.util.control.NonFatal + +import io.airlift.units.Duration import io.swagger.v3.oas.annotations.media.{Content, Schema} import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag import io.trino.client.QueryResults import org.apache.kyuubi.Logging -import org.apache.kyuubi.server.trino.api.{ApiRequestContext, KyuubiTrinoOperationTranslator} +import org.apache.kyuubi.jdbc.hive.Utils +import org.apache.kyuubi.operation.OperationHandle +import org.apache.kyuubi.server.trino.api.{ApiRequestContext, KyuubiTrinoOperationTranslator, Query, QueryId, Slug, TrinoContext} +import org.apache.kyuubi.server.trino.api.Slug.Context.{EXECUTING_QUERY, QUEUED_QUERY} import org.apache.kyuubi.server.trino.api.v1.dto.Ok +import org.apache.kyuubi.service.BackendService +import org.apache.kyuubi.sql.parser.trino.KyuubiTrinoFeParser +import org.apache.kyuubi.sql.plan.trino.{Deallocate, ExecuteForPreparing, Prepare} @Tag(name = "Statement") @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class StatementResource extends ApiRequestContext with Logging { lazy val translator = new KyuubiTrinoOperationTranslator(fe.be) + lazy val parser = new KyuubiTrinoFeParser() @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "test") @GET @Path("test") @@ -51,75 +65,204 @@ private[v1] class StatementResource extends ApiRequestContext with Logging { schema = new Schema(implementation = classOf[QueryResults]))), description = "Create a query") - @GET + @POST @Path("/") @Consumes(Array(MediaType.TEXT_PLAIN)) - def query(statement: String, @Context headers: HttpHeaders): QueryResults = { - throw new UnsupportedOperationException + def query( + statement: String, + @Context headers: HttpHeaders, + @Context uriInfo: UriInfo): Response = { + if (statement == null || statement.isEmpty) { + throw badRequest(BAD_REQUEST, "SQL statement is empty") + } + + val remoteAddr = Option(httpRequest.getRemoteAddr) + val trinoContext = TrinoContext(headers, remoteAddr) + + try { + parser.parsePlan(statement) match { + case Prepare(statementId, _) => + val query = Query( + statementId, + statement.split(s"$statementId FROM")(1), + trinoContext, + fe.be) + val qr = query.getPrepareQueryResults(query.getLastToken, uriInfo) + TrinoContext.buildTrinoResponse(qr, query.context) + case ExecuteForPreparing(statementId, parameters) => + val parametersMap = new util.HashMap[Integer, String]() + for (i <- 0 until parameters.size) { + parametersMap.put(i + 1, parameters(i)) + } + trinoContext.preparedStatement.get(statementId).map { originSql => + val realSql = Utils.updateSql(originSql, parametersMap) + val query = Query(realSql, trinoContext, translator, fe.be) + val qr = query.getQueryResults(query.getLastToken, uriInfo) + TrinoContext.buildTrinoResponse(qr, query.context) + }.get + case Deallocate(statementId) => + info(s"DEALLOCATE PREPARE ${statementId}") + val query = Query( + QueryId(new OperationHandle(UUID.randomUUID())), + trinoContext, + fe.be) + val qr = query.getPrepareQueryResults(query.getLastToken, uriInfo) + TrinoContext.buildTrinoResponse(qr, query.context) + case _ => + val query = Query(statement, trinoContext, translator, fe.be) + val qr = query.getQueryResults(query.getLastToken, uriInfo) + TrinoContext.buildTrinoResponse(qr, query.context) + } + } catch { + case e: Exception => + val errorMsg = + s"Error submitting sql" + error(errorMsg, e) + throw badRequest(BAD_REQUEST, errorMsg + "\n" + e.getMessage) + } } @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Get queued statement status") @GET @Path("/queued/{queryId}/{slug}/{token}") def getQueuedStatementStatus( - @Context headers: HttpHeaders, @PathParam("queryId") queryId: String, @PathParam("slug") slug: String, - @PathParam("token") token: Long): QueryResults = { - throw new UnsupportedOperationException + @PathParam("token") token: Long, + @QueryParam("maxWait") maxWait: Duration, + @Context headers: HttpHeaders, + @Context uriInfo: UriInfo): Response = { + + val remoteAddr = Option(httpRequest.getRemoteAddr) + val trinoContext = TrinoContext(headers, remoteAddr) + val waitTime = if (maxWait == null) 0 else maxWait.toMillis + getQuery(fe.be, trinoContext, QueryId(queryId), slug, token, QUEUED_QUERY) + .flatMap(query => + Try(TrinoContext.buildTrinoResponse( + query.getQueryResults( + token, + uriInfo, + waitTime), + query.context))) + .recover { + case NonFatal(e) => + val errorMsg = + s"Error executing for query id $queryId" + error(errorMsg, e) + throw badRequest(NOT_FOUND, "Query not found") + }.get } @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Get executing statement status") @GET @Path("/executing/{queryId}/{slug}/{token}") def getExecutingStatementStatus( - @Context headers: HttpHeaders, @PathParam("queryId") queryId: String, @PathParam("slug") slug: String, - @PathParam("token") token: Long): QueryResults = { - throw new UnsupportedOperationException + @PathParam("token") token: Long, + @QueryParam("maxWait") maxWait: Duration, + @Context headers: HttpHeaders, + @Context uriInfo: UriInfo): Response = { + + val remoteAddr = Option(httpRequest.getRemoteAddr) + val trinoContext = TrinoContext(headers, remoteAddr) + val waitTime = if (maxWait == null) 0 else maxWait.toMillis + getQuery(fe.be, trinoContext, QueryId(queryId), slug, token, EXECUTING_QUERY) + .flatMap(query => + Try(TrinoContext.buildTrinoResponse( + query.getQueryResults(token, uriInfo, waitTime), + query.context))) + .recover { + case NonFatal(e) => + val errorMsg = + s"Error executing for query id $queryId" + error(errorMsg, e) + throw badRequest(NOT_FOUND, "Query not found") + }.get } @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Cancel queued statement") @DELETE @Path("/queued/{queryId}/{slug}/{token}") def cancelQueuedStatement( - @Context headers: HttpHeaders, @PathParam("queryId") queryId: String, @PathParam("slug") slug: String, - @PathParam("token") token: Long): QueryResults = { - throw new UnsupportedOperationException + @PathParam("token") token: Long, + @Context headers: HttpHeaders): Response = { + + val remoteAddr = Option(httpRequest.getRemoteAddr) + val trinoContext = TrinoContext(headers, remoteAddr) + getQuery(fe.be, trinoContext, QueryId(queryId), slug, token, QUEUED_QUERY) + .flatMap(query => Try(query.cancel)) + .recover { + case NonFatal(e) => + val errorMsg = + s"Error executing for query id $queryId" + error(errorMsg, e) + throw badRequest(NOT_FOUND, "Query not found") + }.get + Response.noContent.build } @ApiResponse( responseCode = "200", - content = Array(new Content( - mediaType = MediaType.APPLICATION_JSON)), + content = Array(new Content(mediaType = MediaType.APPLICATION_JSON)), description = "Cancel executing statement") @DELETE @Path("/executing/{queryId}/{slug}/{token}") def cancelExecutingStatementStatus( - @Context headers: HttpHeaders, @PathParam("queryId") queryId: String, @PathParam("slug") slug: String, - @PathParam("token") token: Long): QueryResults = { - throw new UnsupportedOperationException + @PathParam("token") token: Long, + @Context headers: HttpHeaders): Response = { + + val remoteAddr = Option(httpRequest.getRemoteAddr) + val trinoContext = TrinoContext(headers, remoteAddr) + getQuery(fe.be, trinoContext, QueryId(queryId), slug, token, EXECUTING_QUERY) + .flatMap(query => Try(query.cancel)) + .recover { + case NonFatal(e) => + val errorMsg = + s"Error executing for query id $queryId" + error(errorMsg, e) + throw badRequest(NOT_FOUND, "Query not found") + }.get + + Response.noContent.build } + private def getQuery( + be: BackendService, + context: TrinoContext, + queryId: QueryId, + slug: String, + token: Long, + slugContext: Slug.Context.Context): Try[Query] = { + Try(be.sessionManager.operationManager.getOperation(queryId.operationHandle)).map { op => + val sessionWithId = context.session ++ + Map(Query.KYUUBI_SESSION_ID -> op.getSession.handle.identifier.toString) + Query(queryId, context.copy(session = sessionWithId), be) + }.filter(_.getSlug.isValid(slugContext, slug, token)) + } + + private def badRequest(status: Response.Status, message: String) = + new WebApplicationException( + Response.status(status) + .`type`(TEXT_PLAIN_TYPE) + .entity(message) + .build) + } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/dto/Ok.java b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/dto/Ok.java index 50d04609fb9..982baa2ef38 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/dto/Ok.java +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/v1/dto/Ok.java @@ -20,6 +20,9 @@ import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.commons.lang3.builder.ReflectionToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; @@ -28,10 +31,16 @@ public class Ok { public Ok() {} - public Ok(String content) { + /** + * Follow Trino way that explicitly specifies the json property since we disable the jackson + * auto detect feature. See {@link org.apache.kyuubi.server.trino.api.TrinoScalaObjectMapper} + */ + @JsonCreator + public Ok(@JsonProperty("content") String content) { this.content = content; } + @JsonProperty public String getContent() { return content; } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSessionImpl.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSessionImpl.scala index 967397c9575..228890a1e4e 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSessionImpl.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSessionImpl.scala @@ -17,20 +17,16 @@ package org.apache.kyuubi.session -import java.util.UUID - import scala.collection.JavaConverters._ -import com.codahale.metrics.MetricRegistry import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.client.api.v1.dto.BatchRequest -import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.client.util.BatchUtils._ +import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} import org.apache.kyuubi.engine.KyuubiApplicationManager import org.apache.kyuubi.engine.spark.SparkProcessBuilder import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} -import org.apache.kyuubi.metrics.MetricsConstants.{CONN_OPEN, CONN_TOTAL} -import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.operation.OperationState import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.SessionType.SessionType @@ -53,9 +49,10 @@ class KyuubiBatchSessionImpl( sessionManager) { override val sessionType: SessionType = SessionType.BATCH - override val handle: SessionHandle = recoveryMetadata.map { metadata => - SessionHandle(UUID.fromString(metadata.identifier)) - }.getOrElse(SessionHandle()) + override val handle: SessionHandle = { + val batchId = recoveryMetadata.map(_.identifier).getOrElse(conf(KYUUBI_BATCH_ID_KEY)) + SessionHandle.fromUUID(batchId) + } override def createTime: Long = recoveryMetadata.map(_.createTime).getOrElse(super.createTime) @@ -80,6 +77,10 @@ class KyuubiBatchSessionImpl( override lazy val name: Option[String] = Option(batchRequest.getName).orElse( normalizedConf.get(KyuubiConf.SESSION_NAME.key)) + // whether the resource file is from uploading + private[kyuubi] val isResourceUploaded: Boolean = batchRequest.getConf + .getOrDefault(KyuubiReservedKeys.KYUUBI_BATCH_RESOURCE_UPLOADED_KEY, "false").toBoolean + private[kyuubi] lazy val batchJobSubmissionOp = sessionManager.operationManager .newBatchJobSubmissionOperation( this, @@ -104,7 +105,7 @@ class KyuubiBatchSessionImpl( } private val sessionEvent = KyuubiSessionEvent(this) - recoveryMetadata.map(metadata => sessionEvent.engineId = metadata.engineId) + recoveryMetadata.foreach(metadata => sessionEvent.engineId = metadata.engineId) EventBus.post(sessionEvent) override def getSessionEvent: Option[KyuubiSessionEvent] = { @@ -116,7 +117,8 @@ class KyuubiBatchSessionImpl( batchRequest.getBatchType, normalizedConf, sessionManager.getConf) - if (batchRequest.getResource != SparkProcessBuilder.INTERNAL_RESOURCE) { + if (batchRequest.getResource != SparkProcessBuilder.INTERNAL_RESOURCE + && !isResourceUploaded) { KyuubiApplicationManager.checkApplicationAccessPath( batchRequest.getResource, sessionManager.getConf) @@ -124,10 +126,7 @@ class KyuubiBatchSessionImpl( } override def open(): Unit = handleSessionException { - MetricsSystem.tracing { ms => - ms.incCount(CONN_TOTAL) - ms.incCount(MetricRegistry.name(CONN_OPEN, user)) - } + traceMetricsOnOpen() if (recoveryMetadata.isEmpty) { val metaData = Metadata( @@ -147,6 +146,7 @@ class KyuubiBatchSessionImpl( engineType = batchRequest.getBatchType, clusterManager = batchJobSubmissionOp.builder.clusterManager()) + // there is a chance that operation failed w/ duplicated key error sessionManager.insertMetadata(metaData) } @@ -172,6 +172,6 @@ class KyuubiBatchSessionImpl( waitMetadataRequestsRetryCompletion() sessionEvent.endTime = System.currentTimeMillis() EventBus.post(sessionEvent) - MetricsSystem.tracing(_.decCount(MetricRegistry.name(CONN_OPEN, user))) + traceMetricsOnClose() } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala index e2c69282092..7316e367b3c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala @@ -16,10 +16,13 @@ */ package org.apache.kyuubi.session +import com.codahale.metrics.MetricRegistry import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_SESSION_CONNECTION_URL_KEY, KYUUBI_SESSION_REAL_USER_KEY} -import org.apache.kyuubi.events.KyuubiSessionEvent +import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} +import org.apache.kyuubi.metrics.MetricsConstants.{CONN_OPEN, CONN_TOTAL} +import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.session.SessionType.SessionType abstract class KyuubiSession( @@ -46,8 +49,25 @@ abstract class KyuubiSession( f } catch { case t: Throwable => - getSessionEvent.foreach(_.exception = Some(t)) + getSessionEvent.foreach { sessionEvent => + sessionEvent.exception = Some(t) + EventBus.post(sessionEvent) + } throw t } } + + protected def traceMetricsOnOpen(): Unit = MetricsSystem.tracing { ms => + ms.incCount(CONN_TOTAL) + ms.incCount(MetricRegistry.name(CONN_TOTAL, sessionType.toString)) + ms.incCount(MetricRegistry.name(CONN_OPEN, user)) + ms.incCount(MetricRegistry.name(CONN_OPEN, user, sessionType.toString)) + ms.incCount(MetricRegistry.name(CONN_OPEN, sessionType.toString)) + } + + protected def traceMetricsOnClose(): Unit = MetricsSystem.tracing { ms => + ms.decCount(MetricRegistry.name(CONN_OPEN, user)) + ms.decCount(MetricRegistry.name(CONN_OPEN, user, sessionType.toString)) + ms.decCount(MetricRegistry.name(CONN_OPEN, sessionType.toString)) + } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala index b669390969e..80df5c44dd0 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala @@ -21,19 +21,16 @@ import java.util.Base64 import scala.collection.JavaConverters._ -import com.codahale.metrics.MetricRegistry import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.client.KyuubiSyncThriftClient import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ -import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_CREDENTIALS_KEY, KYUUBI_SESSION_SIGN_PUBLICKEY, KYUUBI_SESSION_USER_SIGN} +import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_CREDENTIALS_KEY, KYUUBI_SESSION_HANDLE_KEY, KYUUBI_SESSION_SIGN_PUBLICKEY, KYUUBI_SESSION_USER_SIGN} import org.apache.kyuubi.engine.{EngineRef, KyuubiApplicationManager} import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} import org.apache.kyuubi.ha.client.DiscoveryClientProvider._ -import org.apache.kyuubi.metrics.MetricsConstants._ -import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.operation.{Operation, OperationHandle} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.service.authentication.InternalSecurityAccessor @@ -80,7 +77,7 @@ class KyuubiSessionImpl( lazy val engine: EngineRef = new EngineRef( sessionConf, user, - sessionManager.groupProvider.primaryGroup(user, optimizedConf.asJava), + sessionManager.groupProvider, handle.identifier.toString, sessionManager.applicationManager) private[kyuubi] val launchEngineOp = sessionManager.operationManager @@ -108,11 +105,10 @@ class KyuubiSessionImpl( private var _engineSessionHandle: SessionHandle = _ + private var openSessionError: Option[Throwable] = None + override def open(): Unit = handleSessionException { - MetricsSystem.tracing { ms => - ms.incCount(CONN_TOTAL) - ms.incCount(MetricRegistry.name(CONN_OPEN, user)) - } + traceMetricsOnOpen() checkSessionAccessPathURIs() @@ -122,80 +118,84 @@ class KyuubiSessionImpl( runOperation(launchEngineOp) } - private[kyuubi] def openEngineSession(extraEngineLog: Option[OperationLog] = None): Unit = { - withDiscoveryClient(sessionConf) { discoveryClient => - var openEngineSessionConf = optimizedConf - if (engineCredentials.nonEmpty) { - sessionConf.set(KYUUBI_ENGINE_CREDENTIALS_KEY, engineCredentials) - openEngineSessionConf = - optimizedConf ++ Map(KYUUBI_ENGINE_CREDENTIALS_KEY -> engineCredentials) - } + private[kyuubi] def openEngineSession(extraEngineLog: Option[OperationLog] = None): Unit = + handleSessionException { + withDiscoveryClient(sessionConf) { discoveryClient => + var openEngineSessionConf = + optimizedConf ++ Map(KYUUBI_SESSION_HANDLE_KEY -> handle.identifier.toString) + if (engineCredentials.nonEmpty) { + sessionConf.set(KYUUBI_ENGINE_CREDENTIALS_KEY, engineCredentials) + openEngineSessionConf = + openEngineSessionConf ++ Map(KYUUBI_ENGINE_CREDENTIALS_KEY -> engineCredentials) + } - if (sessionConf.get(SESSION_USER_SIGN_ENABLED)) { - openEngineSessionConf = openEngineSessionConf + - (SESSION_USER_SIGN_ENABLED.key -> - sessionConf.get(SESSION_USER_SIGN_ENABLED).toString) + - (KYUUBI_SESSION_SIGN_PUBLICKEY -> - Base64.getEncoder.encodeToString( - sessionManager.signingPublicKey.getEncoded)) + - (KYUUBI_SESSION_USER_SIGN -> sessionUserSignBase64) - } + if (sessionConf.get(SESSION_USER_SIGN_ENABLED)) { + openEngineSessionConf = openEngineSessionConf + + (SESSION_USER_SIGN_ENABLED.key -> + sessionConf.get(SESSION_USER_SIGN_ENABLED).toString) + + (KYUUBI_SESSION_SIGN_PUBLICKEY -> + Base64.getEncoder.encodeToString( + sessionManager.signingPublicKey.getEncoded)) + + (KYUUBI_SESSION_USER_SIGN -> sessionUserSignBase64) + } - val maxAttempts = sessionManager.getConf.get(ENGINE_OPEN_MAX_ATTEMPTS) - val retryWait = sessionManager.getConf.get(ENGINE_OPEN_RETRY_WAIT) - var attempt = 0 - var shouldRetry = true - while (attempt <= maxAttempts && shouldRetry) { - val (host, port) = engine.getOrCreate(discoveryClient, extraEngineLog) - try { - val passwd = - if (sessionManager.getConf.get(ENGINE_SECURITY_ENABLED)) { - InternalSecurityAccessor.get().issueToken() - } else { - Option(password).filter(_.nonEmpty).getOrElse("anonymous") - } - _client = KyuubiSyncThriftClient.createClient(user, passwd, host, port, sessionConf) - _engineSessionHandle = _client.openSession(protocol, user, passwd, openEngineSessionConf) - logSessionInfo(s"Connected to engine [$host:$port]/[${client.engineId.getOrElse("")}]" + - s" with ${_engineSessionHandle}]") - shouldRetry = false - } catch { - case e: org.apache.thrift.transport.TTransportException - if attempt < maxAttempts && e.getCause.isInstanceOf[java.net.ConnectException] && - e.getCause.getMessage.contains("Connection refused (Connection refused)") => - warn( - s"Failed to open [${engine.defaultEngineName} $host:$port] after" + - s" $attempt/$maxAttempts times, retrying", - e.getCause) - Thread.sleep(retryWait) - shouldRetry = true - case e: Throwable => - error( - s"Opening engine [${engine.defaultEngineName} $host:$port]" + - s" for $user session failed", - e) - throw e - } finally { - attempt += 1 - if (shouldRetry && _client != null) { - try { - _client.closeSession() - } catch { - case e: Throwable => - warn( - "Error on closing broken client of engine " + - s"[${engine.defaultEngineName} $host:$port]", - e) + val maxAttempts = sessionManager.getConf.get(ENGINE_OPEN_MAX_ATTEMPTS) + val retryWait = sessionManager.getConf.get(ENGINE_OPEN_RETRY_WAIT) + var attempt = 0 + var shouldRetry = true + while (attempt <= maxAttempts && shouldRetry) { + val (host, port) = engine.getOrCreate(discoveryClient, extraEngineLog) + try { + val passwd = + if (sessionManager.getConf.get(ENGINE_SECURITY_ENABLED)) { + InternalSecurityAccessor.get().issueToken() + } else { + Option(password).filter(_.nonEmpty).getOrElse("anonymous") + } + _client = KyuubiSyncThriftClient.createClient(user, passwd, host, port, sessionConf) + _engineSessionHandle = + _client.openSession(protocol, user, passwd, openEngineSessionConf) + logSessionInfo(s"Connected to engine [$host:$port]/[${client.engineId.getOrElse("")}]" + + s" with ${_engineSessionHandle}]") + shouldRetry = false + } catch { + case e: org.apache.thrift.transport.TTransportException + if attempt < maxAttempts && e.getCause.isInstanceOf[java.net.ConnectException] && + e.getCause.getMessage.contains("Connection refused (Connection refused)") => + warn( + s"Failed to open [${engine.defaultEngineName} $host:$port] after" + + s" $attempt/$maxAttempts times, retrying", + e.getCause) + Thread.sleep(retryWait) + shouldRetry = true + case e: Throwable => + error( + s"Opening engine [${engine.defaultEngineName} $host:$port]" + + s" for $user session failed", + e) + openSessionError = Some(e) + throw e + } finally { + attempt += 1 + if (shouldRetry && _client != null) { + try { + _client.closeSession() + } catch { + case e: Throwable => + warn( + "Error on closing broken client of engine " + + s"[${engine.defaultEngineName} $host:$port]", + e) + } } } } + sessionEvent.openedTime = System.currentTimeMillis() + sessionEvent.remoteSessionId = _engineSessionHandle.identifier.toString + _client.engineId.foreach(e => sessionEvent.engineId = e) + EventBus.post(sessionEvent) } - sessionEvent.openedTime = System.currentTimeMillis() - sessionEvent.remoteSessionId = _engineSessionHandle.identifier.toString - _client.engineId.foreach(e => sessionEvent.engineId = e) - EventBus.post(sessionEvent) } - } override protected def runOperation(operation: Operation): OperationHandle = { if (operation != launchEngineOp) { @@ -251,10 +251,10 @@ class KyuubiSessionImpl( try { if (_client != null) _client.closeSession() } finally { - if (engine != null) engine.close() + openSessionError.foreach { _ => if (engine != null) engine.close() } sessionEvent.endTime = System.currentTimeMillis() EventBus.post(sessionEvent) - MetricsSystem.tracing(_.decCount(MetricRegistry.name(CONN_OPEN, user))) + traceMetricsOnClose() } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala index f4b12f3861f..73248cd5632 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala @@ -107,6 +107,7 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { MetricsSystem.tracing { ms => ms.incCount(CONN_FAIL) ms.incCount(MetricRegistry.name(CONN_FAIL, user)) + ms.incCount(MetricRegistry.name(CONN_FAIL, SessionType.INTERACTIVE.toString)) } throw KyuubiSQLException( s"Error opening session for $username client ip $ipAddress, due to ${e.getMessage}", @@ -168,6 +169,7 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { MetricsSystem.tracing { ms => ms.incCount(CONN_FAIL) ms.incCount(MetricRegistry.name(CONN_FAIL, user)) + ms.incCount(MetricRegistry.name(CONN_FAIL, SessionType.BATCH.toString)) } throw KyuubiSQLException( s"Error opening batch session[$handle] for $user client ip $ipAddress," + @@ -237,6 +239,7 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { ms.registerGauge(CONN_OPEN, getOpenSessionCount, 0) ms.registerGauge(EXEC_POOL_ALIVE, getExecPoolSize, 0) ms.registerGauge(EXEC_POOL_ACTIVE, getActiveCount, 0) + ms.registerGauge(EXEC_POOL_WORK_QUEUE_SIZE, getWorkQueueSize, 0) } super.start() } @@ -297,6 +300,16 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { userUnlimitedList) } + private[kyuubi] def getUnlimitedUsers(): Set[String] = { + limiter.orElse(batchLimiter).map(SessionLimiter.getUnlimitedUsers).getOrElse(Set.empty) + } + + private[kyuubi] def refreshUnlimitedUsers(conf: KyuubiConf): Unit = { + val unlimitedUsers = conf.get(SERVER_LIMIT_CONNECTIONS_USER_UNLIMITED_LIST).toSet + limiter.foreach(SessionLimiter.resetUnlimitedUsers(_, unlimitedUsers)) + batchLimiter.foreach(SessionLimiter.resetUnlimitedUsers(_, unlimitedUsers)) + } + private def applySessionLimiter( userLimit: Int, ipAddressLimit: Int, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/SessionLimiter.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/SessionLimiter.scala index b7acbac3d8c..96ca36df176 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/SessionLimiter.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/SessionLimiter.scala @@ -105,7 +105,7 @@ class SessionLimiterWithUnlimitedUsersImpl( userLimit: Int, ipAddressLimit: Int, userIpAddressLimit: Int, - unlimitedUsers: Set[String]) + var unlimitedUsers: Set[String]) extends SessionLimiterImpl(userLimit, ipAddressLimit, userIpAddressLimit) { override def increment(userIpAddress: UserIpAddress): Unit = { if (!unlimitedUsers.contains(userIpAddress.user)) { @@ -118,6 +118,10 @@ class SessionLimiterWithUnlimitedUsersImpl( super.decrement(userIpAddress) } } + + private[kyuubi] def setUnlimitedUsers(unlimitedUsers: Set[String]): Unit = { + this.unlimitedUsers = unlimitedUsers + } } object SessionLimiter { @@ -126,12 +130,22 @@ object SessionLimiter { userLimit: Int, ipAddressLimit: Int, userIpAddressLimit: Int, - userWhiteList: Set[String] = Set.empty): SessionLimiter = { + unlimitedUsers: Set[String] = Set.empty): SessionLimiter = { new SessionLimiterWithUnlimitedUsersImpl( userLimit, ipAddressLimit, userIpAddressLimit, - userWhiteList) + unlimitedUsers) } + def resetUnlimitedUsers(limiter: SessionLimiter, unlimitedUsers: Set[String]): Unit = + limiter match { + case l: SessionLimiterWithUnlimitedUsersImpl => l.setUnlimitedUsers(unlimitedUsers) + case _ => + } + + def getUnlimitedUsers(limiter: SessionLimiter): Set[String] = limiter match { + case l: SessionLimiterWithUnlimitedUsersImpl => l.unlimitedUsers + case _ => Set.empty + } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeAstBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeAstBuilder.scala index c5ae9719947..8d1e38519d9 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeAstBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeAstBuilder.scala @@ -25,7 +25,7 @@ import org.apache.kyuubi.sql.KyuubiTrinoFeBaseParser._ import org.apache.kyuubi.sql.KyuubiTrinoFeBaseParserBaseVisitor import org.apache.kyuubi.sql.parser.KyuubiParser.unescapeSQLString import org.apache.kyuubi.sql.plan.{KyuubiTreeNode, PassThroughNode} -import org.apache.kyuubi.sql.plan.trino.{GetCatalogs, GetColumns, GetSchemas, GetTables, GetTableTypes, GetTypeInfo} +import org.apache.kyuubi.sql.plan.trino.{Deallocate, ExecuteForPreparing, GetCatalogs, GetColumns, GetPrimaryKeys, GetSchemas, GetTables, GetTableTypes, GetTypeInfo, Prepare} class KyuubiTrinoFeAstBuilder extends KyuubiTrinoFeBaseParserBaseVisitor[AnyRef] { @@ -92,6 +92,10 @@ class KyuubiTrinoFeAstBuilder extends KyuubiTrinoFeBaseParserBaseVisitor[AnyRef] GetColumns(catalog, schemaPattern, tableNamePattern, colNamePattern) } + override def visitGetPrimaryKeys(ctx: GetPrimaryKeysContext): KyuubiTreeNode = { + GetPrimaryKeys() + } + override def visitNullCatalog(ctx: NullCatalogContext): AnyRef = { null } @@ -119,4 +123,21 @@ class KyuubiTrinoFeAstBuilder extends KyuubiTrinoFeBaseParserBaseVisitor[AnyRef] override def visitTypesFilter(ctx: TypesFilterContext): List[String] = { ctx.stringLit().asScala.map(v => unescapeSQLString(v.getText)).toList } + + override def visitExecute(ctx: ExecuteContext): KyuubiTreeNode = { + val parameters = Option(ctx.parameterList()) match { + case Some(para) => + para.anyStr().asScala.toList.map(p => p.getText.substring(1, p.getText.length - 1)) + case None => List[String]() + } + ExecuteForPreparing(ctx.IDENTIFIER().getText, parameters) + } + + override def visitPrepare(ctx: PrepareContext): KyuubiTreeNode = { + Prepare(ctx.IDENTIFIER().getText, ctx.statement().getText) + } + + override def visitDeallocate(ctx: DeallocateContext): KyuubiTreeNode = { + Deallocate(ctx.IDENTIFIER().getText) + } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeParser.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeParser.scala index 987288b0f82..5dececf20f0 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeParser.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/parser/trino/KyuubiTrinoFeParser.scala @@ -56,4 +56,5 @@ class KyuubiTrinoFeParser extends KyuubiParserBase[KyuubiTrinoFeBaseParser] { } override def parseTree(parser: KyuubiTrinoFeBaseParser): ParseTree = parser.singleStatement() + } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/trino/TrinoFeOperations.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/trino/TrinoFeOperations.scala index 85e6f168bcb..8d02a74c676 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/trino/TrinoFeOperations.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/trino/TrinoFeOperations.scala @@ -55,3 +55,22 @@ case class GetColumns( colNamePattern: String) extends KyuubiTreeNode { override def name(): String = "Get Columns" } + +case class GetPrimaryKeys() extends KyuubiTreeNode { + override def name(): String = "Get Primary Keys" +} + +case class ExecuteForPreparing(statementId: String, parameters: List[String]) + extends KyuubiTreeNode { + override def name(): String = "Execute For Preparing" +} + +case class Prepare(statementId: String, sql: String) + extends KyuubiTreeNode { + override def name(): String = "Prepare Sql" +} + +case class Deallocate(statementId: String) + extends KyuubiTreeNode { + override def name(): String = "Deallocate Prepare" +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala index 921aa04ae3c..0c934b51d06 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala @@ -22,7 +22,7 @@ import java.io.File import com.fasterxml.jackson.databind.ObjectMapper import com.google.common.base.Charsets import com.google.common.io.Files -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClient} +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, KubernetesClient, KubernetesClientBuilder} import io.fabric8.kubernetes.client.Config.autoConfigure import io.fabric8.kubernetes.client.okhttp.OkHttpClientFactory import okhttp3.{Dispatcher, OkHttpClient} @@ -93,7 +93,10 @@ object KubernetesUtils extends Logging { debug("Kubernetes client config: " + new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(config)) - Some(new DefaultKubernetesClient(factoryWithCustomDispatcher.createHttpClient(config), config)) + Some(new KubernetesClientBuilder() + .withHttpClientFactory(factoryWithCustomDispatcher) + .withConfig(config) + .build()) } implicit private class OptionConfigurableConfigBuilder(val configBuilder: ConfigBuilder) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/RestClientTestHelper.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/RestClientTestHelper.scala index 5b362738196..8344cdef01d 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/RestClientTestHelper.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/RestClientTestHelper.scala @@ -54,7 +54,7 @@ trait RestClientTestHelper extends RestFrontendTestHelper with KerberizedTestHel .set(KyuubiConf.SERVER_SPNEGO_KEYTAB, testKeytab) .set(KyuubiConf.SERVER_SPNEGO_PRINCIPAL, testSpnegoPrincipal) .set(KyuubiConf.AUTHENTICATION_LDAP_URL, ldapUrl) - .set(KyuubiConf.AUTHENTICATION_LDAP_BASEDN, ldapBaseDn) + .set(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, ldapBaseDn.head) .set( KyuubiConf.AUTHENTICATION_CUSTOM_CLASS, classOf[UserDefineAuthenticationProviderImpl].getCanonicalName) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/RestFrontendTestHelper.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/RestFrontendTestHelper.scala index c081185d8ac..fafdcf4a7b1 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/RestFrontendTestHelper.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/RestFrontendTestHelper.scala @@ -22,6 +22,7 @@ import javax.ws.rs.client.WebTarget import javax.ws.rs.core.{Application, Response, UriBuilder} import org.glassfish.jersey.client.ClientConfig +import org.glassfish.jersey.media.multipart.MultiPartFeature import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.test.JerseyTest import org.glassfish.jersey.test.jetty.JettyTestContainerFactory @@ -36,12 +37,14 @@ import org.apache.kyuubi.service.AbstractFrontendService object RestFrontendTestHelper { - private class RestApiBaseSuite extends JerseyTest { + class RestApiBaseSuite extends JerseyTest { override def configure: Application = new ResourceConfig(getClass) + .register(classOf[MultiPartFeature]) override def configureClient(config: ClientConfig): Unit = { config.register(classOf[KyuubiScalaObjectMapper]) + .register(classOf[MultiPartFeature]) } override def getTestContainerFactory: TestContainerFactory = new JettyTestContainerFactory @@ -55,7 +58,7 @@ trait RestFrontendTestHelper extends WithKyuubiServer { override protected val frontendProtocols: Seq[FrontendProtocol] = FrontendProtocols.REST :: Nil - private val restApiBaseSuite = new RestApiBaseSuite + protected val restApiBaseSuite: JerseyTest = new RestApiBaseSuite override def beforeAll(): Unit = { super.beforeAll() diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/TrinoClientTestHelper.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/TrinoClientTestHelper.scala deleted file mode 100644 index c0b3949f4ee..00000000000 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/TrinoClientTestHelper.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kyuubi - -import java.net.URI -import java.time.ZoneId -import java.util.{Locale, Optional} -import java.util.concurrent.TimeUnit - -import scala.collection.JavaConverters._ - -import io.airlift.units.Duration -import io.trino.client.{ClientSelectedRole, ClientSession, StatementClient, StatementClientFactory} -import okhttp3.OkHttpClient - -trait TrinoClientTestHelper extends RestFrontendTestHelper { - - override def afterAll(): Unit = { - super.afterAll() - } - - private val httpClient = new OkHttpClient.Builder().build() - - protected val clientSession = createClientSession(baseUri: URI) - - def getTrinoStatementClient(sql: String): StatementClient = { - StatementClientFactory.newStatementClient(httpClient, clientSession, sql) - } - - def createClientSession(connectUrl: URI): ClientSession = { - new ClientSession( - connectUrl, - "kyuubi_test", - Optional.of("test_user"), - "kyuubi", - Optional.of("test_token_tracing"), - Set[String]().asJava, - "test_client_info", - "test_catalog", - "test_schema", - "test_path", - ZoneId.systemDefault(), - Locale.getDefault, - Map[String, String]( - "test_resource_key0" -> "test_resource_value0", - "test_resource_key1" -> "test_resource_value1").asJava, - Map[String, String]( - "test_property_key0" -> "test_property_value0", - "test_property_key1" -> "test_propert_value1").asJava, - Map[String, String]( - "test_statement_key0" -> "select 1", - "test_statement_key1" -> "select 2").asJava, - Map[String, ClientSelectedRole]( - "test_role_key0" -> ClientSelectedRole.valueOf("ROLE"), - "test_role_key2" -> ClientSelectedRole.valueOf("ALL")).asJava, - Map[String, String]( - "test_credentials_key0" -> "test_credentials_value0", - "test_credentials_key1" -> "test_credentials_value1").asJava, - "test_transaction_id", - new Duration(2, TimeUnit.MINUTES), - true) - - } - -} diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/TrinoRestFrontendTestHelper.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/TrinoRestFrontendTestHelper.scala new file mode 100644 index 00000000000..1ff00e64fa2 --- /dev/null +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/TrinoRestFrontendTestHelper.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi + +import org.glassfish.jersey.client.ClientConfig +import org.glassfish.jersey.test.JerseyTest + +import org.apache.kyuubi.config.KyuubiConf.FrontendProtocols +import org.apache.kyuubi.config.KyuubiConf.FrontendProtocols.FrontendProtocol +import org.apache.kyuubi.server.trino.api.TrinoScalaObjectMapper + +trait TrinoRestFrontendTestHelper extends RestFrontendTestHelper { + + private class TrinoRestBaseSuite extends RestFrontendTestHelper.RestApiBaseSuite { + override def configureClient(config: ClientConfig): Unit = { + config.register(classOf[TrinoScalaObjectMapper]) + } + } + + override protected val frontendProtocols: Seq[FrontendProtocol] = + FrontendProtocols.TRINO :: Nil + + override protected val restApiBaseSuite: JerseyTest = new TrinoRestBaseSuite + +} diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala index 727c5545e9d..3bc6bb1c578 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala @@ -17,14 +17,17 @@ package org.apache.kyuubi +import java.util.UUID + import scala.collection.JavaConverters._ import scala.concurrent.duration._ +import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiConf.FrontendProtocols.FrontendProtocol +import org.apache.kyuubi.engine.{ApplicationState, YarnApplicationOperation} import org.apache.kyuubi.engine.ApplicationState._ -import org.apache.kyuubi.engine.YarnApplicationOperation import org.apache.kyuubi.operation.{FetchOrientation, HiveJDBCTestHelper, OperationState} import org.apache.kyuubi.operation.OperationState.ERROR import org.apache.kyuubi.server.MiniYarnService @@ -104,7 +107,10 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD test("open batch session") { val batchRequest = - newSparkBatchRequest(Map("spark.master" -> "local", "spark.executor.instances" -> "1")) + newSparkBatchRequest(Map( + "spark.master" -> "local", + "spark.executor.instances" -> "1", + KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString)) val sessionHandle = sessionManager.openBatchSession( "kyuubi", @@ -117,7 +123,7 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD val batchJobSubmissionOp = session.batchJobSubmissionOp eventually(timeout(3.minutes), interval(50.milliseconds)) { - val appInfo = batchJobSubmissionOp.currentApplicationInfo + val appInfo = batchJobSubmissionOp.getOrFetchCurrentApplicationInfo assert(appInfo.nonEmpty) assert(appInfo.exists(_.id.startsWith("application_"))) } @@ -152,7 +158,7 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD val appUrl = rows("url") val appError = rows("error") - val appInfo2 = batchJobSubmissionOp.currentApplicationInfo.get + val appInfo2 = batchJobSubmissionOp.getOrFetchCurrentApplicationInfo.get assert(appId === appInfo2.id) assert(appName === appInfo2.name) assert(appState === appInfo2.state.toString) @@ -162,7 +168,9 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD } test("prevent dead loop if the batch job submission process it not alive") { - val batchRequest = newSparkBatchRequest(Map("spark.submit.deployMode" -> "invalid")) + val batchRequest = newSparkBatchRequest(Map( + "spark.submit.deployMode" -> "invalid", + KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString)) val sessionHandle = sessionManager.openBatchSession( "kyuubi", @@ -175,7 +183,9 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD val batchJobSubmissionOp = session.batchJobSubmissionOp eventually(timeout(3.minutes), interval(50.milliseconds)) { - assert(batchJobSubmissionOp.currentApplicationInfo.isEmpty) + assert(batchJobSubmissionOp.getOrFetchCurrentApplicationInfo.exists(_.id == null)) + assert(batchJobSubmissionOp.getOrFetchCurrentApplicationInfo.exists( + _.state == ApplicationState.NOT_FOUND)) assert(batchJobSubmissionOp.getStatus.state === OperationState.ERROR) } } @@ -186,7 +196,8 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD "spark.submit.deployMode" -> "cluster", "spark.sql.defaultCatalog=spark_catalog" -> "spark_catalog", "spark.sql.catalog.spark_catalog.type" -> "invalid_type", - "kyuubi.session.engine.initialize.timeout" -> "PT10m"))(Map.empty) { + "kyuubi.session.engine.initialize.timeout" -> "PT10M", + KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString))(Map.empty) { val startTime = System.currentTimeMillis() val exception = intercept[Exception] { withJdbcStatement() { _ => } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala index 31ab67754f2..9fff482d449 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala @@ -17,13 +17,11 @@ package org.apache.kyuubi.config -import java.nio.charset.StandardCharsets -import java.nio.file.{Files, Path, Paths} +import java.nio.file.Paths import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer -import org.apache.kyuubi.{KyuubiFunSuite, TestUtils, Utils} +import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, MarkdownUtils, Utils} import org.apache.kyuubi.ctl.CtlConf import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.metrics.MetricsConf @@ -37,12 +35,12 @@ import org.apache.kyuubi.zookeeper.ZookeeperConf * * To run the entire test suite: * {{{ - * build/mvn clean install -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration + * build/mvn clean test -pl kyuubi-server -am -Pflink-provided,spark-provided,hive-provided -Dtest=none -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration * }}} * * To re-generate golden files for entire suite, run: * {{{ - * KYUUBI_UPDATE=1 build/mvn clean install -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration + * KYUUBI_UPDATE=1 build/mvn clean test -pl kyuubi-server -am -Pflink-provided,spark-provided,hive-provided -Dtest=none -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration * }}} */ // scalastyle:on line.size.limit @@ -51,256 +49,197 @@ class AllKyuubiConfiguration extends KyuubiFunSuite { private val markdown = Paths.get(kyuubiHome, "docs", "deployment", "settings.md") .toAbsolutePath - def rewriteToConf(path: Path, buffer: ArrayBuffer[String]): Unit = { - val env = - Files.newBufferedReader(path, StandardCharsets.UTF_8) - - try { - buffer += "```bash" - var line = env.readLine() - while (line != null) { - buffer += line - line = env.readLine() - } - buffer += "```" - } finally { - env.close() - } - } + private def loadConfigs = Array( + KyuubiConf, + CtlConf, + HighAvailabilityConf, + JDBCMetadataStoreConf, + MetricsConf, + ZookeeperConf) test("Check all kyuubi configs") { - KyuubiConf - CtlConf - HighAvailabilityConf - JDBCMetadataStoreConf - MetricsConf - ZookeeperConf - - val newOutput = new ArrayBuffer[String]() - newOutput += "" - newOutput += "" - newOutput += "" - newOutput += "" - newOutput += "" - newOutput += "# Introduction to the Kyuubi Configurations System" - newOutput += "" - newOutput += "Kyuubi provides several ways to configure the system and corresponding engines." - newOutput += "" - newOutput += "" - newOutput += "## Environments" - newOutput += "" - newOutput += "" - newOutput += "You can configure the environment variables in" + - " `$KYUUBI_HOME/conf/kyuubi-env.sh`, e.g, `JAVA_HOME`, then this java runtime will be used" + - " both for Kyuubi server instance and the applications it launches. You can also change" + - " the variable in the subprocess's env configuration file, e.g." + - "`$SPARK_HOME/conf/spark-env.sh` to use more specific ENV for SQL engine applications." - - rewriteToConf(Paths.get(kyuubiHome, "conf", "kyuubi-env.sh.template"), newOutput) - - newOutput += "" - newOutput += "For the environment variables that only needed to be transferred into engine" + - " side, you can set it with a Kyuubi configuration item formatted" + - " `kyuubi.engineEnv.VAR_NAME`. For example, with `kyuubi.engineEnv.SPARK_DRIVER_MEMORY=4g`," + - " the environment variable `SPARK_DRIVER_MEMORY` with value `4g` would be transferred into" + - " engine side. With `kyuubi.engineEnv.SPARK_CONF_DIR=/apache/confs/spark/conf`, the" + - " value of `SPARK_CONF_DIR` in engine side is set to `/apache/confs/spark/conf`." - - newOutput += "" - newOutput += "## Kyuubi Configurations" - newOutput += "" - - newOutput += "You can configure the Kyuubi properties in" + - " `$KYUUBI_HOME/conf/kyuubi-defaults.conf`. For example:" - - rewriteToConf(Paths.get(kyuubiHome, "conf", "kyuubi-defaults.conf.template"), newOutput) + loadConfigs + + val builder = MarkdownBuilder(licenced = true, getClass.getName) + + builder + .lines(s""" + |# Introduction to the Kyuubi Configurations System + | + |Kyuubi provides several ways to configure the system and corresponding engines. + | + |## Environments + | + |""") + .line("""You can configure the environment variables in `$KYUUBI_HOME/conf/kyuubi-env.sh`, + | e.g, `JAVA_HOME`, then this java runtime will be used both for Kyuubi server instance and + | the applications it launches. You can also change the variable in the subprocess's env + | configuration file, e.g.`$SPARK_HOME/conf/spark-env.sh` to use more specific ENV for + | SQL engine applications. see `$KYUUBI_HOME/conf/kyuubi-env.sh.template` as an example. + | """) + .line( + """ + | For the environment variables that only needed to be transferred into engine + | side, you can set it with a Kyuubi configuration item formatted + | `kyuubi.engineEnv.VAR_NAME`. For example, with `kyuubi.engineEnv.SPARK_DRIVER_MEMORY=4g`, + | the environment variable `SPARK_DRIVER_MEMORY` with value `4g` would be transferred into + | engine side. With `kyuubi.engineEnv.SPARK_CONF_DIR=/apache/confs/spark/conf`, the + | value of `SPARK_CONF_DIR` on the engine side is set to `/apache/confs/spark/conf`. + | """) + .line("## Kyuubi Configurations") + .line(""" You can configure the Kyuubi properties in + | `$KYUUBI_HOME/conf/kyuubi-defaults.conf`, see + | `$KYUUBI_HOME/conf/kyuubi-defaults.conf.template` as an example. + | """) KyuubiConf.getConfigEntries().asScala - .toSeq + .toStream .filterNot(_.internal) .groupBy(_.key.split("\\.")(1)) .toSeq.sortBy(_._1).foreach { case (category, entries) => - newOutput += "" - newOutput += s"### ${category.capitalize}" - newOutput += "" - - newOutput += "Key | Default | Meaning | Type | Since" - newOutput += "--- | --- | --- | --- | ---" + builder.lines( + s"""### ${category.capitalize} + | Key | Default | Meaning | Type | Since + | --- | --- | --- | --- | --- + |""") entries.sortBy(_.key).foreach { c => val dft = c.defaultValStr.replace("<", "<").replace(">", ">") - val seq = Seq( + builder.line(Seq( s"${c.key}", s"$dft", s"${c.doc}", s"${c.typ}", - s"${c.version}") - newOutput += seq.mkString("|") + s"${c.version}").mkString("|")) } - newOutput += "" } - newOutput += ("## Spark Configurations") - newOutput += "" - - newOutput += ("### Via spark-defaults.conf") - newOutput += "" - - newOutput += ("Setting them in `$SPARK_HOME/conf/spark-defaults.conf`" + - " supplies with default values for SQL engine application. Available properties can be" + - " found at Spark official online documentation for" + - " [Spark Configurations](http://spark.apache.org/docs/latest/configuration.html)") - - newOutput += "" - newOutput += ("### Via kyuubi-defaults.conf") - newOutput += "" - newOutput += ("Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf`" + - " supplies with default values for SQL engine application too. These properties will" + - " override all settings in `$SPARK_HOME/conf/spark-defaults.conf`") - - newOutput += "" - newOutput += ("### Via JDBC Connection URL") - newOutput += "" - newOutput += ("Setting them in the JDBC Connection URL" + - " supplies session-specific for each SQL engine. For example: " + - "```" + - "jdbc:hive2://localhost:10009/default;#" + - "spark.sql.shuffle.partitions=2;spark.executor.memory=5g" + - "```") - newOutput += "" - newOutput += ("- **Runtime SQL Configuration**") - newOutput += "" - newOutput += (" - For [Runtime SQL Configurations](" + - "http://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they" + - " will take affect every time") - newOutput += "" - newOutput += ("- **Static SQL and Spark Core Configuration**") - newOutput += "" - newOutput += (" - For [Static SQL Configurations](" + - "http://spark.apache.org/docs/latest/configuration.html#static-sql-configuration) and" + - " other spark core configs, e.g. `spark.executor.memory`, they will take affect if there" + - " is no existing SQL engine application. Otherwise, they will just be ignored") - newOutput += "" - newOutput += ("### Via SET Syntax") - newOutput += "" - newOutput += ("Please refer to the Spark official online documentation for" + - " [SET Command](http://spark.apache.org/docs/latest/sql-ref-syntax-aux-conf-mgmt-set.html)") - newOutput += "" - - newOutput += ("## Flink Configurations") - newOutput += "" - - newOutput += ("### Via flink-conf.yaml") - newOutput += "" - newOutput += ("Setting them in `$FLINK_HOME/conf/flink-conf.yaml`" + - " supplies with default values for SQL engine application." + - " Available properties can be found at Flink official online documentation for" + - " [Flink Configurations]" + - "(https://nightlies.apache.org/flink/flink-docs-stable/docs/deployment/config/)") - newOutput += "" - - newOutput += ("### Via kyuubi-defaults.conf") - newOutput += "" - newOutput += ("Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf`" + - " supplies with default values for SQL engine application too." + - " You can use properties with the additional prefix `flink.` to override settings in" + - " `$FLINK_HOME/conf/flink-conf.yaml`.") - newOutput += "" - newOutput += ("For example:") - newOutput += ("```") - newOutput += ("flink.parallelism.default 2") - newOutput += ("flink.taskmanager.memory.process.size 5g") - newOutput += ("```") - newOutput += "" - newOutput += ("The below options in `kyuubi-defaults.conf` will set `parallelism.default: 2`" + - " and `taskmanager.memory.process.size: 5g` into flink configurations.") - newOutput += "" - - newOutput += ("### Via JDBC Connection URL") - newOutput += "" - newOutput += "Setting them in the JDBC Connection URL supplies session-specific" + - " for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;" + - "#parallelism.default=2;taskmanager.memory.process.size=5g```" - newOutput += "" - - newOutput += ("### Via SET Statements") - newOutput += "" - newOutput += ("Please refer to the Flink official online documentation for [SET Statements]" + - "(https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/table/sql/set/)") - newOutput += "" - - newOutput += ("## Logging") - newOutput += "" - newOutput += ("Kyuubi uses [log4j](https://logging.apache.org/log4j/2.x/) for logging." + - " You can configure it using `$KYUUBI_HOME/conf/log4j2.xml`.") - - rewriteToConf(Paths.get(kyuubiHome, "conf", "log4j2.xml.template"), newOutput) - - newOutput += "" - newOutput += ("## Other Configurations") - newOutput += "" - newOutput += ("### Hadoop Configurations") - newOutput += "" - newOutput += ("Specifying `HADOOP_CONF_DIR` to the directory contains hadoop configuration" + - " files or treating them as Spark properties with a `spark.hadoop.` prefix." + - " Please refer to the Spark official online documentation for" + - " [Inheriting Hadoop Cluster Configuration](http://spark.apache.org/docs/latest/" + - "configuration.html#inheriting-hadoop-cluster-configuration)." + - " Also, please refer to the [Apache Hadoop](http://hadoop.apache.org)'s" + - " online documentation for an overview on how to configure Hadoop.") - newOutput += "" - newOutput += ("### Hive Configurations") - newOutput += "" - newOutput += ("These configurations are used for SQL engine application to talk to" + - " Hive MetaStore and could be configured in a `hive-site.xml`." + - " Placed it in `$SPARK_HOME/conf` directory, or treating them as Spark properties with" + - " a `spark.hadoop.` prefix.") - - newOutput += "" - newOutput += ("## User Defaults") - newOutput += "" - newOutput += ("In Kyuubi, we can configure user default settings to meet separate needs." + - " These user defaults override system defaults, but will be overridden by those from" + - " [JDBC Connection URL](#via-jdbc-connection-url) or [Set Command](#via-set-syntax)" + - " if could be. They will take effect when creating the SQL engine application ONLY.") - newOutput += ("User default settings are in the form of `___{username}___.{config key}`." + - " There are three continuous underscores(`_`) at both sides of the `username` and" + - " a dot(`.`) that separates the config key and the prefix. For example:") - newOutput += ("```bash") - newOutput += ("# For system defaults") - newOutput += ("spark.master=local") - newOutput += ("spark.sql.adaptive.enabled=true") - newOutput += ("# For a user named kent") - newOutput += ("___kent___.spark.master=yarn") - newOutput += ("___kent___.spark.sql.adaptive.enabled=false") - newOutput += ("# For a user named bob") - newOutput += ("___bob___.spark.master=spark://master:7077") - newOutput += ("___bob___.spark.executor.memory=8g") - newOutput += ("```") - newOutput += "" - newOutput += "In the above case, if there are related configurations from" + - " [JDBC Connection URL](#via-jdbc-connection-url), `kent` will run his SQL engine" + - " application on YARN and prefer the Spark AQE to be off, while `bob` will activate" + - " his SQL engine application on a Spark standalone cluster with 8g heap memory for each" + - " executor and obey the Spark AQE behavior of Kyuubi system default. On the other hand," + - " for those users who do not have custom configurations will use system defaults." - - TestUtils.verifyOutput(markdown, newOutput, getClass.getCanonicalName) + builder + .lines(""" + |## Spark Configurations + |### Via spark-defaults.conf + |""") + .line(""" + | Setting them in `$SPARK_HOME/conf/spark-defaults.conf` + | supplies with default values for SQL engine application. Available properties can be + | found at Spark official online documentation for + | [Spark Configurations](https://spark.apache.org/docs/latest/configuration.html) + | """) + .line("### Via kyuubi-defaults.conf") + .line(""" + | Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` + | supplies with default values for SQL engine application too. These properties will + | override all settings in `$SPARK_HOME/conf/spark-defaults.conf`""") + .line("### Via JDBC Connection URL") + .line(""" + | Setting them in the JDBC Connection URL + | supplies session-specific for each SQL engine. For example: + | ``` + |jdbc:hive2://localhost:10009/default;# + |spark.sql.shuffle.partitions=2;spark.executor.memory=5g + |```""") + .line() + .line("- **Runtime SQL Configuration**") + .line(""" - For [Runtime SQL Configurations]( + |https://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they + | will take affect every time""") + .line("- **Static SQL and Spark Core Configuration**") + .line(""" - For [Static SQL Configurations]( + |https://spark.apache.org/docs/latest/configuration.html#static-sql-configuration) and + | other spark core configs, e.g. `spark.executor.memory`, they will take effect if there + | is no existing SQL engine application. Otherwise, they will just be ignored""") + .line("### Via SET Syntax") + .line("""Please refer to the Spark official online documentation for + | [SET Command](https://spark.apache.org/docs/latest/sql-ref-syntax-aux-conf-mgmt-set.html) + |""") + + builder + .lines(""" + |## Flink Configurations + |### Via flink-conf.yaml""") + .line("""Setting them in `$FLINK_HOME/conf/flink-conf.yaml` + | supplies with default values for SQL engine application. + | Available properties can be found at Flink official online documentation for + | [Flink Configurations] + |(https://nightlies.apache.org/flink/flink-docs-stable/docs/deployment/config/)""") + .line("### Via kyuubi-defaults.conf") + .line("""Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` + | supplies with default values for SQL engine application too. + | You can use properties with the additional prefix `flink.` to override settings in + | `$FLINK_HOME/conf/flink-conf.yaml`.""") + .lines(""" + | + |For example: + |``` + |flink.parallelism.default 2 + |flink.taskmanager.memory.process.size 5g + |```""") + .line("""The below options in `kyuubi-defaults.conf` will set `parallelism.default: 2` + | and `taskmanager.memory.process.size: 5g` into flink configurations.""") + .line("### Via JDBC Connection URL") + .line("""Setting them in the JDBC Connection URL supplies session-specific + | for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default; + |#parallelism.default=2;taskmanager.memory.process.size=5g``` + |""") + .line("### Via SET Statements") + .line("""Please refer to the Flink official online documentation for [SET Statements] + |(https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/table/sql/set/)""") + + builder + .line("## Logging") + .line("""Kyuubi uses [log4j](https://logging.apache.org/log4j/2.x/) for logging. + | You can configure it using `$KYUUBI_HOME/conf/log4j2.xml`, see + | `$KYUUBI_HOME/conf/log4j2.xml.template` as an example. + | """) + + builder + .lines(""" + |## Other Configurations + |### Hadoop Configurations + |""") + .line("""Specifying `HADOOP_CONF_DIR` to the directory containing Hadoop configuration + | files or treating them as Spark properties with a `spark.hadoop.` prefix. + | Please refer to the Spark official online documentation for + | [Inheriting Hadoop Cluster Configuration](https://spark.apache.org/docs/latest/ + |configuration.html#inheriting-hadoop-cluster-configuration). + | Also, please refer to the [Apache Hadoop](https://hadoop.apache.org)'s + | online documentation for an overview on how to configure Hadoop.""") + .line("### Hive Configurations") + .line("""These configurations are used for SQL engine application to talk to + | Hive MetaStore and could be configured in a `hive-site.xml`. + | Placed it in `$SPARK_HOME/conf` directory, or treat them as Spark properties with + | a `spark.hadoop.` prefix.""") + + builder + .line("## User Defaults") + .line("""In Kyuubi, we can configure user default settings to meet separate needs. + | These user defaults override system defaults, but will be overridden by those from + | [JDBC Connection URL](#via-jdbc-connection-url) or [Set Command](#via-set-syntax) + | if could be. They will take effect when creating the SQL engine application ONLY.""") + .line("""User default settings are in the form of `___{username}___.{config key}`. + | There are three continuous underscores(`_`) at both sides of the `username` and + | a dot(`.`) that separates the config key and the prefix. For example:""") + .lines(""" + |```bash + |# For system defaults + |spark.master=local + |spark.sql.adaptive.enabled=true + |# For a user named kent + |___kent___.spark.master=yarn + |___kent___.spark.sql.adaptive.enabled=false + |# For a user named bob + |___bob___.spark.master=spark://master:7077 + |___bob___.spark.executor.memory=8g + |``` + | + |""") + .line("""In the above case, if there are related configurations from + | [JDBC Connection URL](#via-jdbc-connection-url), `kent` will run his SQL engine + | application on YARN and prefer the Spark AQE to be off, while `bob` will activate + | his SQL engine application on a Spark standalone cluster with 8g heap memory for each + | executor and obey the Spark AQE behavior of Kyuubi system default. On the other hand, + | for those users who do not have custom configurations will use system defaults.""") + + MarkdownUtils.verifyOutput(markdown, builder, getClass.getCanonicalName, "kyuubi-server") } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefTests.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefTests.scala index 5d8ae3177f5..8b050684a59 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefTests.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefTests.scala @@ -20,6 +20,8 @@ package org.apache.kyuubi.engine import java.util.UUID import java.util.concurrent.Executors +import scala.collection.JavaConverters._ + import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KYUUBI_VERSION, Utils} @@ -33,6 +35,7 @@ import org.apache.kyuubi.ha.client.DiscoveryClientProvider import org.apache.kyuubi.ha.client.DiscoveryPaths import org.apache.kyuubi.metrics.MetricsConstants.ENGINE_TOTAL import org.apache.kyuubi.metrics.MetricsSystem +import org.apache.kyuubi.plugin.PluginLoader import org.apache.kyuubi.util.NamedThreadFactory trait EngineRefTests extends KyuubiFunSuite { @@ -68,7 +71,9 @@ trait EngineRefTests extends KyuubiFunSuite { Seq(None, Some("suffix")).foreach { domain => conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, CONNECTION.toString) domain.foreach(conf.set(KyuubiConf.ENGINE_SHARE_LEVEL_SUBDOMAIN.key, _)) - val engine = new EngineRef(conf, user, "grp", id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val engine = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_${CONNECTION}_${engineType}", @@ -82,7 +87,9 @@ trait EngineRefTests extends KyuubiFunSuite { val id = UUID.randomUUID().toString conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, USER.toString) conf.set(KyuubiConf.ENGINE_TYPE, FLINK_SQL.toString) - val appName = new EngineRef(conf, user, "grp", id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val appName = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(appName.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_${USER}_$FLINK_SQL", @@ -94,7 +101,7 @@ trait EngineRefTests extends KyuubiFunSuite { k => conf.unset(KyuubiConf.ENGINE_SHARE_LEVEL_SUBDOMAIN) conf.set(k.key, "abc") - val appName2 = new EngineRef(conf, user, "grp", id, null) + val appName2 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(appName2.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_${USER}_${FLINK_SQL}", @@ -108,8 +115,12 @@ trait EngineRefTests extends KyuubiFunSuite { val id = UUID.randomUUID().toString conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, GROUP.toString) conf.set(KyuubiConf.ENGINE_TYPE, SPARK_SQL.toString) - val primaryGroupName = "primary_grp" - val engineRef = new EngineRef(conf, user, primaryGroupName, id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val primaryGroupName = + PluginLoader.loadGroupProvider(conf).primaryGroup(user, Map.empty[String, String].asJava) + + val engineRef = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engineRef.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_GROUP_SPARK_SQL", @@ -122,7 +133,7 @@ trait EngineRefTests extends KyuubiFunSuite { k => conf.unset(k) conf.set(k.key, "abc") - val engineRef2 = new EngineRef(conf, user, primaryGroupName, id, null) + val engineRef2 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engineRef2.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_${GROUP}_${SPARK_SQL}", @@ -137,7 +148,9 @@ trait EngineRefTests extends KyuubiFunSuite { val id = UUID.randomUUID().toString conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, SERVER.toString) conf.set(KyuubiConf.ENGINE_TYPE, FLINK_SQL.toString) - val appName = new EngineRef(conf, user, "grp", id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val appName = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(appName.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_${SERVER}_${FLINK_SQL}", @@ -146,7 +159,7 @@ trait EngineRefTests extends KyuubiFunSuite { assert(appName.defaultEngineName === s"kyuubi_${SERVER}_${FLINK_SQL}_${user}_default_$id") conf.set(KyuubiConf.ENGINE_SHARE_LEVEL_SUBDOMAIN.key, "abc") - val appName2 = new EngineRef(conf, user, "grp", id, null) + val appName2 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(appName2.engineSpace === DiscoveryPaths.makePath( s"kyuubi_${KYUUBI_VERSION}_${SERVER}_${FLINK_SQL}", @@ -161,31 +174,33 @@ trait EngineRefTests extends KyuubiFunSuite { // set subdomain and disable engine pool conf.set(ENGINE_SHARE_LEVEL_SUBDOMAIN.key, "abc") conf.set(ENGINE_POOL_SIZE, -1) - val engine1 = new EngineRef(conf, user, "grp", id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val engine1 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine1.subdomain === "abc") // unset subdomain and disable engine pool conf.unset(ENGINE_SHARE_LEVEL_SUBDOMAIN) conf.set(ENGINE_POOL_SIZE, -1) - val engine2 = new EngineRef(conf, user, "grp", id, null) + val engine2 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine2.subdomain === "default") // set subdomain and 1 <= engine pool size < threshold conf.set(ENGINE_SHARE_LEVEL_SUBDOMAIN.key, "abc") conf.set(ENGINE_POOL_SIZE, 1) - val engine3 = new EngineRef(conf, user, "grp", id, null) + val engine3 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine3.subdomain === "abc") // unset subdomain and 1 <= engine pool size < threshold conf.unset(ENGINE_SHARE_LEVEL_SUBDOMAIN) conf.set(ENGINE_POOL_SIZE, 3) - val engine4 = new EngineRef(conf, user, "grp", id, null) + val engine4 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine4.subdomain.startsWith("engine-pool-")) // unset subdomain and engine pool size > threshold conf.unset(ENGINE_SHARE_LEVEL_SUBDOMAIN) conf.set(ENGINE_POOL_SIZE, 100) - val engine5 = new EngineRef(conf, user, "grp", id, null) + val engine5 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) val engineNumber = Integer.parseInt(engine5.subdomain.substring(12)) val threshold = ENGINE_POOL_SIZE_THRESHOLD.defaultVal.get assert(engineNumber <= threshold) @@ -195,7 +210,7 @@ trait EngineRefTests extends KyuubiFunSuite { val enginePoolName = "test-pool" conf.set(ENGINE_POOL_NAME, enginePoolName) conf.set(ENGINE_POOL_SIZE, 3) - val engine6 = new EngineRef(conf, user, "grp", id, null) + val engine6 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine6.subdomain.startsWith(s"$enginePoolName-")) conf.unset(ENGINE_SHARE_LEVEL_SUBDOMAIN) @@ -204,9 +219,9 @@ trait EngineRefTests extends KyuubiFunSuite { conf.set(ENGINE_POOL_NAME, pool_name) conf.set(HighAvailabilityConf.HA_NAMESPACE, "engine_test") conf.set(HighAvailabilityConf.HA_ADDRESSES, getConnectString()) - conf.set(ENGINE_POOL_BALANCE_POLICY, "POLLING") + conf.set(ENGINE_POOL_SELECT_POLICY, "POLLING") (0 until (10)).foreach { i => - val engine7 = new EngineRef(conf, user, "grp", id, null) + val engine7 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) val engineNumber = Integer.parseInt(engine7.subdomain.substring(pool_name.length + 1)) assert(engineNumber == (i % conf.get(ENGINE_POOL_SIZE))) } @@ -219,7 +234,9 @@ trait EngineRefTests extends KyuubiFunSuite { conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) conf.set(HighAvailabilityConf.HA_NAMESPACE, "engine_test") conf.set(HighAvailabilityConf.HA_ADDRESSES, getConnectString()) - val engine = new EngineRef(conf, user, id, "grp", null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val engine = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) var port1 = 0 var port2 = 0 @@ -261,6 +278,7 @@ trait EngineRefTests extends KyuubiFunSuite { conf.set(KyuubiConf.ENGINE_INIT_TIMEOUT, 3000L) conf.set(HighAvailabilityConf.HA_NAMESPACE, "engine_test2") conf.set(HighAvailabilityConf.HA_ADDRESSES, getConnectString()) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") val beforeEngines = MetricsSystem.counterValue(ENGINE_TOTAL).getOrElse(0L) val start = System.currentTimeMillis() @@ -272,7 +290,12 @@ trait EngineRefTests extends KyuubiFunSuite { executor.execute(() => { DiscoveryClientProvider.withDiscoveryClient(cloned) { client => try { - new EngineRef(cloned, user, "grp", id, null).getOrCreate(client) + new EngineRef( + cloned, + user, + PluginLoader.loadGroupProvider(conf), + id, + null).getOrCreate(client) } finally { times(i) = System.currentTimeMillis() } @@ -300,20 +323,22 @@ trait EngineRefTests extends KyuubiFunSuite { conf.set(ENGINE_SHARE_LEVEL_SUBDOMAIN.key, "abc") conf.set(ENGINE_POOL_IGNORE_SUBDOMAIN, false) conf.set(ENGINE_POOL_SIZE, -1) - val engine1 = new EngineRef(conf, user, "grp", id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val engine1 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine1.subdomain === "abc") conf.set(ENGINE_POOL_SIZE, 1) - val engine2 = new EngineRef(conf, user, "grp", id, null) + val engine2 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine2.subdomain === "abc") conf.unset(ENGINE_SHARE_LEVEL_SUBDOMAIN) - val engine3 = new EngineRef(conf, user, "grp", id, null) + val engine3 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine3.subdomain.startsWith("engine-pool-")) conf.set(ENGINE_SHARE_LEVEL_SUBDOMAIN.key, "abc") conf.set(ENGINE_POOL_IGNORE_SUBDOMAIN, true) - val engine4 = new EngineRef(conf, user, "grp", id, null) + val engine4 = new EngineRef(conf, user, PluginLoader.loadGroupProvider(conf), id, null) assert(engine4.subdomain.startsWith("engine-pool-")) } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefWithZookeeperSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefWithZookeeperSuite.scala index 8695e13c414..40fc818706c 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefWithZookeeperSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/EngineRefWithZookeeperSuite.scala @@ -29,6 +29,7 @@ import org.apache.kyuubi.engine.EngineType.SPARK_SQL import org.apache.kyuubi.engine.ShareLevel.USER import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.DiscoveryClientProvider +import org.apache.kyuubi.plugin.PluginLoader import org.apache.kyuubi.zookeeper.EmbeddedZookeeper import org.apache.kyuubi.zookeeper.ZookeeperConf @@ -62,6 +63,8 @@ class EngineRefWithZookeeperSuite extends EngineRefTests { conf.set(KyuubiConf.ENGINE_INIT_TIMEOUT, 3000L) conf.set(HighAvailabilityConf.HA_NAMESPACE, "engine_test1") conf.set(HighAvailabilityConf.HA_ADDRESSES, getConnectString()) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + val conf1 = conf.clone conf1.set(KyuubiConf.ENGINE_TYPE, SPARK_SQL.toString) val conf2 = conf.clone @@ -74,7 +77,12 @@ class EngineRefWithZookeeperSuite extends EngineRefTests { executor.execute(() => { DiscoveryClientProvider.withDiscoveryClient(conf1) { client => try { - new EngineRef(conf1, user, "grp", UUID.randomUUID().toString, null) + new EngineRef( + conf1, + user, + PluginLoader.loadGroupProvider(conf), + UUID.randomUUID().toString, + null) .getOrCreate(client) } finally { times(0) = System.currentTimeMillis() @@ -84,7 +92,12 @@ class EngineRefWithZookeeperSuite extends EngineRefTests { executor.execute(() => { DiscoveryClientProvider.withDiscoveryClient(conf2) { client => try { - new EngineRef(conf2, user, "grp", UUID.randomUUID().toString, null) + new EngineRef( + conf2, + user, + PluginLoader.loadGroupProvider(conf), + UUID.randomUUID().toString, + null) .getOrCreate(client) } finally { times(1) = System.currentTimeMillis() diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/PySparkTests.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala similarity index 96% rename from externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/PySparkTests.scala rename to kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala index e2dd2609d8d..6af7e21e25d 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/PySparkTests.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala @@ -24,17 +24,19 @@ import java.util.Properties import scala.sys.process._ -import org.apache.kyuubi.engine.spark.WithSparkSQLEngine +import org.apache.kyuubi.WithKyuubiServer +import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.jdbc.KyuubiHiveDriver import org.apache.kyuubi.jdbc.hive.{KyuubiSQLException, KyuubiStatement} import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.tags.PySparkTest @PySparkTest -class PySparkTests extends WithSparkSQLEngine with HiveJDBCTestHelper { +class PySparkTests extends WithKyuubiServer with HiveJDBCTestHelper { override protected def jdbcUrl: String = getJdbcUrl - override def withKyuubiConf: Map[String, String] = Map.empty + + override protected val conf: KyuubiConf = new KyuubiConf test("pyspark support") { val code = "print(1)" diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkSqlEngineSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkSqlEngineSuite.scala index 1e35d2f1dc8..9ab627413d3 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkSqlEngineSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkSqlEngineSuite.scala @@ -139,13 +139,13 @@ class SparkSqlEngineSuite extends WithKyuubiServer with HiveJDBCTestHelper { val utcResultSet = statement.executeQuery("select from_utc_timestamp(from_unixtime(" + "1670404535000/1000,'yyyy-MM-dd HH:mm:ss'),'GMT+08:00')") assert(utcResultSet.next()) - assert(utcResultSet.getString(1) == "2022-12-07 17:15:35.0") + assert(utcResultSet.getString(1) === "2022-12-07 17:15:35.0") val setGMT8ResultSet = statement.executeQuery("set spark.sql.session.timeZone=GMT+8") assert(setGMT8ResultSet.next()) val gmt8ResultSet = statement.executeQuery("select from_utc_timestamp(from_unixtime(" + "1670404535000/1000,'yyyy-MM-dd HH:mm:ss'),'GMT+08:00')") assert(gmt8ResultSet.next()) - assert(gmt8ResultSet.getString(1) == "2022-12-08 01:15:35.0") + assert(gmt8ResultSet.getString(1) === "2022-12-08 01:15:35.0") } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala index d0c9924dc4d..3bdc9cd3808 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala @@ -28,8 +28,10 @@ import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hive.service.rpc.thrift.{TOpenSessionReq, TStatusCode} +import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi._ +import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.operation.OperationState._ @@ -138,7 +140,7 @@ class ServerJsonLoggingEventHandlerSuite extends WithKyuubiServer with HiveJDBCT Utils.currentUser, "kyuubi", "127.0.0.1", - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), batchRequest) withSessionConf()(Map.empty)(Map("spark.sql.shuffle.partitions" -> "2")) { withJdbcStatement() { statement => @@ -157,7 +159,7 @@ class ServerJsonLoggingEventHandlerSuite extends WithKyuubiServer with HiveJDBCT } } - test("engine session id is not same with server session id") { + test("engine session id is same with server session id") { val name = UUID.randomUUID().toString withSessionConf()(Map.empty)(Map(KyuubiConf.SESSION_NAME.key -> name)) { withJdbcStatement() { statement => @@ -181,7 +183,7 @@ class ServerJsonLoggingEventHandlerSuite extends WithKyuubiServer with HiveJDBCT val res2 = statement.executeQuery( s"SELECT * FROM `json`.`$engineSessionEventPath` " + s"where sessionId = '$serverSessionId' limit 1") - assert(!res2.next()) + assert(res2.next()) } } } @@ -277,15 +279,17 @@ class ServerJsonLoggingEventHandlerSuite extends WithKyuubiServer with HiveJDBCT } } - val serverSessionEventPath = - Paths.get(serverLogRoot, "kyuubi_session", s"day=$currentDate") - withJdbcStatement() { statement => - val res = statement.executeQuery( - s"SELECT * FROM `json`.`$serverSessionEventPath` " + - s"where sessionName = '$name' and exception is not null limit 1") - assert(res.next()) - val exception = res.getObject("exception") - assert(exception.toString.contains("Invalid maximum heap size: -Xmxabc")) + eventually(timeout(2.minutes), interval(10.seconds)) { + val serverSessionEventPath = + Paths.get(serverLogRoot, "kyuubi_session", s"day=$currentDate") + withJdbcStatement() { statement => + val res = statement.executeQuery( + s"SELECT * FROM `json`.`$serverSessionEventPath` " + + s"where sessionName = '$name' and exception is not null limit 1") + assert(res.next()) + val exception = res.getObject("exception") + assert(exception.toString.contains("Invalid maximum heap size: -Xmxabc")) + } } } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationKerberosAndPlainAuthSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationKerberosAndPlainAuthSuite.scala index fc8e1ec70fc..31cde639734 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationKerberosAndPlainAuthSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationKerberosAndPlainAuthSuite.scala @@ -63,11 +63,12 @@ class KyuubiOperationKerberosAndPlainAuthSuite extends WithKyuubiServer with Ker UserGroupInformation.setConfiguration(config) assert(UserGroupInformation.isSecurityEnabled) - KyuubiConf().set(KyuubiConf.AUTHENTICATION_METHOD, Seq("KERBEROS", "LDAP", "CUSTOM")) + KyuubiConf() + .set(KyuubiConf.AUTHENTICATION_METHOD, Seq("KERBEROS", "LDAP", "CUSTOM")) .set(KyuubiConf.SERVER_KEYTAB, testKeytab) .set(KyuubiConf.SERVER_PRINCIPAL, testPrincipal) .set(KyuubiConf.AUTHENTICATION_LDAP_URL, ldapUrl) - .set(KyuubiConf.AUTHENTICATION_LDAP_BASEDN, ldapBaseDn) + .set(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, ldapBaseDn.head) .set( KyuubiConf.AUTHENTICATION_CUSTOM_CLASS, classOf[UserDefineAuthenticationProviderImpl].getCanonicalName) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala index 4c4faf63bbf..d04afbfb580 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala @@ -26,14 +26,15 @@ import scala.collection.JavaConverters._ import org.apache.hive.service.rpc.thrift._ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime -import org.apache.kyuubi.WithKyuubiServer -import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.{KYUUBI_VERSION, WithKyuubiServer} +import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} import org.apache.kyuubi.config.KyuubiConf.SESSION_CONF_ADVISOR import org.apache.kyuubi.engine.ApplicationState import org.apache.kyuubi.jdbc.KyuubiHiveDriver import org.apache.kyuubi.jdbc.hive.KyuubiConnection +import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.plugin.SessionConfAdvisor -import org.apache.kyuubi.session.KyuubiSessionManager +import org.apache.kyuubi.session.{KyuubiSessionManager, SessionType} /** * UT with Connection level engine shared cost much time, only run basic jdbc tests. @@ -136,6 +137,7 @@ class KyuubiOperationPerConnectionSuite extends WithKyuubiServer with HiveJDBCTe assert(connection.getEngineId.startsWith("local-")) assert(connection.getEngineName.startsWith("kyuubi")) assert(connection.getEngineUrl.nonEmpty) + assert(connection.getEngineRefId.nonEmpty) val stmt = connection.createStatement() try { stmt.execute("select engine_name()") @@ -239,6 +241,46 @@ class KyuubiOperationPerConnectionSuite extends WithKyuubiServer with HiveJDBCTe } } } + + test("trace the connection metrics with session type") { + val connOpenMetric = s"${MetricsConstants.CONN_OPEN}.${SessionType.INTERACTIVE}" + val connTotalMetric = s"${MetricsConstants.CONN_TOTAL}.${SessionType.INTERACTIVE}" + val connFailedMetric = s"${MetricsConstants.CONN_FAIL}.${SessionType.INTERACTIVE}" + val connTotalCount = MetricsSystem.counterValue(connTotalMetric).getOrElse(0L) + val connFailedCount = MetricsSystem.counterValue(connFailedMetric).getOrElse(0L) + + withJdbcStatement() { statement => + statement.executeQuery("select engine_name()") + } + eventually(timeout(5.seconds), interval(100.milliseconds)) { + assert(MetricsSystem.counterValue(connTotalMetric).getOrElse(0L) > connTotalCount) + assert(MetricsSystem.counterValue(connOpenMetric).getOrElse(0L) === 0) + } + + withSessionConf(Map.empty)(Map.empty)(Map( + KyuubiConf.SESSION_ENGINE_LAUNCH_ASYNC.key -> "false", + "spark.master" -> "invalid")) { + intercept[Exception] { + withJdbcStatement() { statement => + statement.executeQuery("select engine_name()") + } + } + } + + eventually(timeout(5.seconds), interval(100.milliseconds)) { + assert(MetricsSystem.counterValue(connTotalMetric).getOrElse(0L) - connTotalCount > 1) + assert(MetricsSystem.counterValue(connOpenMetric).getOrElse(0L) === 0) + assert(MetricsSystem.counterValue(connFailedMetric).getOrElse(0L) > connFailedCount) + } + } + + test("support to transfer client version when opening jdbc connection") { + withJdbcStatement() { stmt => + val rs = stmt.executeQuery(s"set spark.${KyuubiReservedKeys.KYUUBI_CLIENT_VERSION_KEY}") + assert(rs.next()) + assert(rs.getString(2) === KYUUBI_VERSION) + } + } } class TestSessionConfAdvisor extends SessionConfAdvisor { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala index 9ed72307977..21bf56b4fb4 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala @@ -28,6 +28,7 @@ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.KYUUBI_ENGINE_ENV_PREFIX import org.apache.kyuubi.engine.SemanticVersion import org.apache.kyuubi.jdbc.hive.KyuubiStatement +import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.session.{KyuubiSessionImpl, KyuubiSessionManager, SessionHandle} import org.apache.kyuubi.zookeeper.ZookeeperConf @@ -165,23 +166,6 @@ class KyuubiOperationPerUserSuite assert(r1 !== r2) } - test("test engine spark result max rows") { - withSessionConf()(Map.empty)(Map(KyuubiConf.OPERATION_RESULT_MAX_ROWS.key -> "1")) { - withJdbcStatement("va") { statement => - statement.executeQuery("create temporary view va as select * from values(1),(2)") - - val resultLimit1 = statement.executeQuery("select * from va") - assert(resultLimit1.next()) - assert(!resultLimit1.next()) - - statement.executeQuery(s"set ${KyuubiConf.OPERATION_RESULT_MAX_ROWS.key}=0") - val resultUnLimit = statement.executeQuery("select * from va") - assert(resultUnLimit.next()) - assert(resultUnLimit.next()) - } - } - } - test("support to interrupt the thrift request if remote engine is broken") { assume(!httpMode) withSessionConf(Map( @@ -216,9 +200,11 @@ class KyuubiOperationPerUserSuite val executeStmtResp = client.ExecuteStatement(executeStmtReq) assert(executeStmtResp.getStatus.getStatusCode === TStatusCode.ERROR_STATUS) assert(executeStmtResp.getStatus.getErrorMessage.contains( - "java.net.SocketException: Connection reset") || + "java.net.SocketException") || + executeStmtResp.getStatus.getErrorMessage.contains( + "org.apache.thrift.transport.TTransportException") || executeStmtResp.getStatus.getErrorMessage.contains( - "Caused by: java.net.SocketException: Broken pipe (Write failed)")) + "connection does not exist")) val elapsedTime = System.currentTimeMillis() - startTime assert(elapsedTime < 20 * 1000) assert(session.client.asyncRequestInterrupted) @@ -226,6 +212,28 @@ class KyuubiOperationPerUserSuite } } + test("max result rows") { + Seq("true", "false").foreach { incremental => + Seq("thrift", "arrow").foreach { resultFormat => + Seq("0", "1").foreach { maxResultRows => + withSessionConf()(Map.empty)(Map( + KyuubiConf.OPERATION_RESULT_FORMAT.key -> resultFormat, + KyuubiConf.OPERATION_RESULT_MAX_ROWS.key -> maxResultRows, + KyuubiConf.OPERATION_INCREMENTAL_COLLECT.key -> incremental)) { + withJdbcStatement("va") { statement => + statement.executeQuery("create temporary view va as select * from values(1),(2)") + val resultLimit = statement.executeQuery("select * from va") + assert(resultLimit.next()) + // always ignore max result rows on incremental collect mode + if (incremental == "true" || maxResultRows == "0") assert(resultLimit.next()) + assert(!resultLimit.next()) + } + } + } + } + } + } + test("scala NPE issue with hdfs jar") { val jarDir = Utils.createTempDir().toFile val udfCode = @@ -331,4 +339,50 @@ class KyuubiOperationPerUserSuite assert(!result.next()) } } + + test("accumulate the operation terminal state") { + val opType = classOf[ExecuteStatement].getSimpleName + val finishedMetric = s"${MetricsConstants.OPERATION_STATE}.$opType" + + s".${OperationState.FINISHED.toString.toLowerCase}" + val closedMetric = s"${MetricsConstants.OPERATION_STATE}.$opType" + + s".${OperationState.CLOSED.toString.toLowerCase}" + val finishedCount = MetricsSystem.meterValue(finishedMetric).getOrElse(0L) + val closedCount = MetricsSystem.meterValue(finishedMetric).getOrElse(0L) + withJdbcStatement() { statement => + statement.executeQuery("select engine_name()") + } + eventually(timeout(5.seconds), interval(100.milliseconds)) { + assert(MetricsSystem.meterValue(finishedMetric).getOrElse(0L) > finishedCount) + assert(MetricsSystem.meterValue(closedMetric).getOrElse(0L) > closedCount) + } + } + + test("trace ExecuteStatement exec time histogram") { + withJdbcStatement() { statement => + statement.executeQuery("select engine_name()") + } + val metric = + s"${MetricsConstants.OPERATION_EXEC_TIME}.${classOf[ExecuteStatement].getSimpleName}" + val snapshot = MetricsSystem.histogramSnapshot(metric).get + assert(snapshot.getMax > 0 && snapshot.getMedian > 0) + } + + test("align the server/engine session/executeStatement handle for Spark engine") { + withSessionConf(Map( + KyuubiConf.SESSION_ENGINE_LAUNCH_ASYNC.key -> "false"))(Map.empty)(Map.empty) { + withJdbcStatement() { _ => + val session = + server.backendService.sessionManager.allSessions().head.asInstanceOf[KyuubiSessionImpl] + eventually(timeout(10.seconds)) { + assert(session.handle === SessionHandle.apply(session.client.remoteSessionHandle)) + } + val opHandle = session.executeStatement("SELECT engine_id()", Map.empty, true, 0L) + eventually(timeout(10.seconds)) { + val operation = session.sessionManager.operationManager.getOperation( + opHandle).asInstanceOf[KyuubiOperation] + assert(opHandle == OperationHandle.apply(operation.remoteOpHandle())) + } + } + } + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala index 64707ce012e..61de8225171 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala @@ -25,7 +25,6 @@ import javax.ws.rs.core.MediaType import scala.collection.JavaConverters._ import org.apache.hadoop.security.UserGroupInformation -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.RestClientTestHelper import org.apache.kyuubi.client.api.v1.dto.{SessionHandle, SessionOpenCount, SessionOpenRequest} @@ -129,11 +128,9 @@ class KyuubiRestAuthenticationSuite extends RestClientTestHelper { val proxyUser = "user1" UserGroupInformation.loginUserFromKeytab(testPrincipal, testKeytab) var token = generateToken(hostName) - val sessionOpenRequest = new SessionOpenRequest( - TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V11.getValue, - Map( - KyuubiConf.ENGINE_SHARE_LEVEL.key -> "CONNECTION", - "hive.server2.proxy.user" -> proxyUser).asJava) + val sessionOpenRequest = new SessionOpenRequest(Map( + KyuubiConf.ENGINE_SHARE_LEVEL.key -> "CONNECTION", + "hive.server2.proxy.user" -> proxyUser).asJava) var response = webTarget.path("api/v1/sessions") .request() diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/thrift/http/KyuubiOperationThriftHttpKerberosAndPlainAuthSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/thrift/http/KyuubiOperationThriftHttpKerberosAndPlainAuthSuite.scala index 4f6ae92f167..941e121a6cd 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/thrift/http/KyuubiOperationThriftHttpKerberosAndPlainAuthSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/thrift/http/KyuubiOperationThriftHttpKerberosAndPlainAuthSuite.scala @@ -53,7 +53,7 @@ class KyuubiOperationThriftHttpKerberosAndPlainAuthSuite .set(KyuubiConf.SERVER_KEYTAB, testKeytab) .set(KyuubiConf.SERVER_PRINCIPAL, testPrincipal) .set(KyuubiConf.AUTHENTICATION_LDAP_URL, ldapUrl) - .set(KyuubiConf.AUTHENTICATION_LDAP_BASEDN, ldapBaseDn) + .set(KyuubiConf.AUTHENTICATION_LDAP_BASE_DN, ldapBaseDn.head) .set( KyuubiConf.AUTHENTICATION_CUSTOM_CLASS, classOf[UserDefineAuthenticationProviderImpl].getCanonicalName) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/parser/trino/KyuubiTrinoFeParserSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/parser/trino/KyuubiTrinoFeParserSuite.scala index 3f5cf70b559..205a6a7be90 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/parser/trino/KyuubiTrinoFeParserSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/parser/trino/KyuubiTrinoFeParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.parser.trino import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.sql.parser.trino.KyuubiTrinoFeParser import org.apache.kyuubi.sql.plan.{KyuubiTreeNode, PassThroughNode} -import org.apache.kyuubi.sql.plan.trino.{GetCatalogs, GetColumns, GetSchemas, GetTables, GetTableTypes, GetTypeInfo} +import org.apache.kyuubi.sql.plan.trino.{Deallocate, ExecuteForPreparing, GetCatalogs, GetColumns, GetPrimaryKeys, GetSchemas, GetTables, GetTableTypes, GetTypeInfo} class KyuubiTrinoFeParserSuite extends KyuubiFunSuite { val parser = new KyuubiTrinoFeParser() @@ -354,4 +354,37 @@ class KyuubiTrinoFeParserSuite extends KyuubiFunSuite { tableName = "%aa", colName = "%bb") } + + test("Support GetPrimaryKeys for Trino Fe") { + val kyuubiTreeNode = parse( + """ + | SELECT CAST(NULL AS varchar) TABLE_CAT, + | CAST(NULL AS varchar) TABLE_SCHEM, + | CAST(NULL AS varchar) TABLE_NAME, + | CAST(NULL AS varchar) COLUMN_NAME, + | CAST(NULL AS smallint) KEY_SEQ, + | CAST(NULL AS varchar) PK_NAME + | WHERE false + |""".stripMargin) + + assert(kyuubiTreeNode.isInstanceOf[GetPrimaryKeys]) + } + + test("Support PreparedStatement for Trino Fe (ExecuteForPreparing)") { + val kyuubiTreeNode = parse( + """ + | EXECUTE statement1 USING INTEGER '1' + |""".stripMargin) + + assert(kyuubiTreeNode.isInstanceOf[ExecuteForPreparing]) + } + + test("Support PreparedStatement for Trino Fe (Deallocate)") { + val kyuubiTreeNode = parse( + """ + | DEALLOCATE PREPARE statement1 + |""".stripMargin) + + assert(kyuubiTreeNode.isInstanceOf[Deallocate]) + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/BackendServiceMetricSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/BackendServiceMetricSuite.scala index 53a53ef1dbe..a58d1842cff 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/BackendServiceMetricSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/BackendServiceMetricSuite.scala @@ -78,7 +78,7 @@ class BackendServiceMetricSuite extends WithKyuubiServer with HiveJDBCTestHelper val meters2 = objMapper.readTree(Paths.get(reportPath.toString, "report.json").toFile).get("meters") - assert(meters2.get(MetricsConstants.BS_FETCH_RESULT_ROWS_RATE).get("count").asInt() == 7) + assert(meters2.get(MetricsConstants.BS_FETCH_RESULT_ROWS_RATE).get("count").asInt() == 8) assert(meters2.get(MetricsConstants.BS_FETCH_LOG_ROWS_RATE).get("count").asInt() >= logRows1) statement.executeQuery("DROP TABLE stu_test") diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala index 69c10e7302f..5c54cbbb4b7 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala @@ -17,7 +17,9 @@ package org.apache.kyuubi.server -import org.apache.hive.service.rpc.thrift.TOpenSessionReq +import scala.collection.JavaConverters._ + +import org.apache.hive.service.rpc.thrift.{TOpenSessionReq, TSessionHandle} import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KyuubiFunSuite, Utils, WithKyuubiServer} @@ -79,4 +81,39 @@ class KyuubiTBinaryFrontendServiceSuite extends WithKyuubiServer with KyuubiFunS MetricsConstants.THRIFT_BINARY_CONN_OPEN).getOrElse(0L) - openConnections === 0) } } + + test("do not close session when disconnect") { + val sessionCount = server.backendService.sessionManager.allSessions().size + var handle: TSessionHandle = null + TClientTestUtils.withThriftClient(server.frontendServices.head) { + client => + val req = new TOpenSessionReq() + req.setUsername(Utils.currentUser) + req.setPassword("anonymous") + req.setConfiguration(Map("kyuubi.session.close.on.disconnect" -> "false").asJava) + val resp = client.OpenSession(req) + handle = resp.getSessionHandle + + assert(server.backendService.sessionManager.allSessions().size - sessionCount == 1) + } + Thread.sleep(3000L) + assert(server.backendService.sessionManager.allSessions().size - sessionCount == 1) + } + + test("close session when disconnect - default behavior") { + val sessionCount = server.backendService.sessionManager.allSessions().size + var handle: TSessionHandle = null + TClientTestUtils.withThriftClient(server.frontendServices.head) { + client => + val req = new TOpenSessionReq() + req.setUsername(Utils.currentUser) + req.setPassword("anonymous") + val resp = client.OpenSession(req) + handle = resp.getSessionHandle + + assert(server.backendService.sessionManager.allSessions().size - sessionCount == 1) + } + Thread.sleep(3000L) + assert(server.backendService.sessionManager.allSessions().size == sessionCount) + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala index bcbdad2cebe..a10994d7ea5 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala @@ -18,25 +18,34 @@ package org.apache.kyuubi.server.api.v1 import java.util.{Base64, UUID} +import javax.ws.rs.client.Entity import javax.ws.rs.core.{GenericType, MediaType} +import scala.collection.JavaConverters._ + +import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiFunSuite, RestFrontendTestHelper, Utils} -import org.apache.kyuubi.client.api.v1.dto.Engine +import org.apache.kyuubi.client.api.v1.dto.{Engine, OperationData, SessionData, SessionHandle, SessionOpenRequest} import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_CONNECTION_URL_KEY import org.apache.kyuubi.engine.{ApplicationState, EngineRef, KyuubiApplicationManager} import org.apache.kyuubi.engine.EngineType.SPARK_SQL import org.apache.kyuubi.engine.ShareLevel.{CONNECTION, USER} import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient import org.apache.kyuubi.ha.client.DiscoveryPaths +import org.apache.kyuubi.plugin.PluginLoader import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { private val engineMgr = new KyuubiApplicationManager() + override protected lazy val conf: KyuubiConf = KyuubiConf() + .set(KyuubiConf.SERVER_ADMINISTRATORS, Seq("admin001")) + override def beforeAll(): Unit = { super.beforeAll() engineMgr.initialize(KyuubiConf()) @@ -64,6 +73,24 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") .post(null) assert(200 == response.getStatus) + + val admin001AuthHeader = new String( + Base64.getEncoder.encode("admin001".getBytes()), + "UTF-8") + response = webTarget.path("api/v1/admin/refresh/hadoop_conf") + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $admin001AuthHeader") + .post(null) + assert(200 == response.getStatus) + + val admin002AuthHeader = new String( + Base64.getEncoder.encode("admin002".getBytes()), + "UTF-8") + response = webTarget.path("api/v1/admin/refresh/hadoop_conf") + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $admin002AuthHeader") + .post(null) + assert(405 == response.getStatus) } test("refresh user defaults config of the kyuubi server") { @@ -84,6 +111,173 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { assert(200 == response.getStatus) } + test("refresh unlimited users of the kyuubi server") { + var response = webTarget.path("api/v1/admin/refresh/unlimited_users") + .request() + .post(null) + assert(405 == response.getStatus) + + val adminUser = Utils.currentUser + val encodeAuthorization = new String( + Base64.getEncoder.encode( + s"$adminUser:".getBytes()), + "UTF-8") + response = webTarget.path("api/v1/admin/refresh/unlimited_users") + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .post(null) + assert(200 == response.getStatus) + } + + test("list/close sessions") { + val requestObj = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) + + var response = webTarget.path("api/v1/sessions") + .request(MediaType.APPLICATION_JSON_TYPE) + .post(Entity.entity(requestObj, MediaType.APPLICATION_JSON_TYPE)) + + val adminUser = Utils.currentUser + val encodeAuthorization = new String( + Base64.getEncoder.encode( + s"$adminUser:".getBytes()), + "UTF-8") + + // get session list + var response2 = webTarget.path("api/v1/admin/sessions").request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + assert(200 == response2.getStatus) + val sessions1 = response2.readEntity(new GenericType[Seq[SessionData]]() {}) + assert(sessions1.nonEmpty) + assert(sessions1.head.getConf.get(KYUUBI_SESSION_CONNECTION_URL_KEY) === fe.connectionUrl) + + // close an opened session + val sessionHandle = response.readEntity(classOf[SessionHandle]).getIdentifier + response = webTarget.path(s"api/v1/admin/sessions/$sessionHandle").request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .delete() + assert(200 == response.getStatus) + + // get session list again + response2 = webTarget.path("api/v1/admin/sessions").request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + assert(200 == response2.getStatus) + val sessions2 = response2.readEntity(classOf[Seq[SessionData]]) + assert(sessions2.isEmpty) + } + + test("list sessions/operations with filter") { + fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "admin", + "123456", + "localhost", + Map("testConfig" -> "testValue")) + + fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "admin", + "123456", + "localhost", + Map("testConfig" -> "testValue")) + + fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "test_user_1", + "xxxxxx", + "localhost", + Map("testConfig" -> "testValue")) + + val sessionHandle = fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "test_user_2", + "xxxxxx", + "localhost", + Map("testConfig" -> "testValue")) + + val adminUser = Utils.currentUser + val encodeAuthorization = new String( + Base64.getEncoder.encode( + s"$adminUser:".getBytes()), + "UTF-8") + + // list sessions + var response = webTarget.path("api/v1/admin/sessions") + .queryParam("users", "admin") + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + var sessions = response.readEntity(classOf[Seq[SessionData]]) + assert(200 == response.getStatus) + assert(sessions.size == 2) + + response = webTarget.path("api/v1/admin/sessions") + .queryParam("users", "test_user_1,test_user_2") + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + sessions = response.readEntity(classOf[Seq[SessionData]]) + assert(200 == response.getStatus) + assert(sessions.size == 2) + + // list operations + response = webTarget.path("api/v1/admin/operations") + .queryParam("users", "test_user_1,test_user_2") + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + var operations = response.readEntity(classOf[Seq[OperationData]]) + assert(operations.size == 2) + + response = webTarget.path("api/v1/admin/operations") + .queryParam("sessionHandle", sessionHandle.identifier) + .request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + operations = response.readEntity(classOf[Seq[OperationData]]) + assert(200 == response.getStatus) + assert(operations.size == 1) + } + + test("list/close operations") { + val sessionHandle = fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "admin", + "123456", + "localhost", + Map("testConfig" -> "testValue")) + val operation = fe.be.getCatalogs(sessionHandle) + + val adminUser = Utils.currentUser + val encodeAuthorization = new String( + Base64.getEncoder.encode( + s"$adminUser:".getBytes()), + "UTF-8") + + // list operations + var response = webTarget.path("api/v1/admin/operations").request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + assert(200 == response.getStatus) + var operations = response.readEntity(new GenericType[Seq[OperationData]]() {}) + assert(operations.nonEmpty) + assert(operations.map(op => op.getIdentifier).contains(operation.identifier.toString)) + + // close operation + response = webTarget.path(s"api/v1/admin/operations/${operation.identifier}").request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .delete() + assert(200 == response.getStatus) + + // list again + response = webTarget.path("api/v1/admin/operations").request() + .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .get() + operations = response.readEntity(new GenericType[Seq[OperationData]]() {}) + assert(!operations.map(op => op.getIdentifier).contains(operation.identifier.toString)) + } + test("delete engine - user share level") { val id = UUID.randomUUID().toString conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, USER.toString) @@ -91,7 +285,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) - val engine = new EngineRef(conf.clone, Utils.currentUser, "grp", id, null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val engine = + new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id, null) val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_USER_SPARK_SQL", @@ -136,9 +333,11 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") val id = UUID.randomUUID().toString - val engine = new EngineRef(conf.clone, Utils.currentUser, "grp", id, null) + val engine = + new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id, null) val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", Utils.currentUser, @@ -174,7 +373,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) - val engine = new EngineRef(conf.clone, Utils.currentUser, id, "grp", null) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + val engine = + new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id, null) val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_USER_SPARK_SQL", @@ -219,6 +421,7 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", @@ -226,14 +429,16 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { "") val id1 = UUID.randomUUID().toString - val engine1 = new EngineRef(conf.clone, Utils.currentUser, "grp", id1, null) + val engine1 = + new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id1, null) val engineSpace1 = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", Utils.currentUser, id1) val id2 = UUID.randomUUID().toString - val engine2 = new EngineRef(conf.clone, Utils.currentUser, "grp", id2, null) + val engine2 = + new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id2, null) val engineSpace2 = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", Utils.currentUser, @@ -283,5 +488,4 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { } } } - } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala index 83c60878a73..055496ff322 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala @@ -28,12 +28,16 @@ import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration.DurationInt import org.apache.hive.service.rpc.thrift.TProtocolVersion +import org.glassfish.jersey.media.multipart.FormDataMultiPart +import org.glassfish.jersey.media.multipart.file.FileDataBodyPart import org.apache.kyuubi.{BatchTestHelper, KyuubiFunSuite, RestFrontendTestHelper} import org.apache.kyuubi.client.api.v1.dto._ +import org.apache.kyuubi.client.util.BatchUtils +import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ -import org.apache.kyuubi.engine.ApplicationInfo +import org.apache.kyuubi.engine.{ApplicationInfo, KyuubiApplicationManager} import org.apache.kyuubi.engine.spark.SparkBatchProcessBuilder import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.{BatchJobSubmission, OperationState} @@ -41,15 +45,14 @@ import org.apache.kyuubi.operation.OperationState.OperationState import org.apache.kyuubi.server.KyuubiRestFrontendService import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER import org.apache.kyuubi.server.metadata.api.Metadata -import org.apache.kyuubi.service.authentication.{KyuubiAuthenticationFactory, UserDefinedEngineSecuritySecretProvider} +import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory import org.apache.kyuubi.session.{KyuubiBatchSessionImpl, KyuubiSessionManager, SessionHandle, SessionType} class BatchesResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper with BatchTestHelper { override protected lazy val conf: KyuubiConf = KyuubiConf() .set(KyuubiConf.ENGINE_SECURITY_ENABLED, true) - .set( - KyuubiConf.ENGINE_SECURITY_SECRET_PROVIDER, - classOf[UserDefinedEngineSecuritySecretProvider].getName) + .set(KyuubiConf.ENGINE_SECURITY_SECRET_PROVIDER, "simple") + .set(KyuubiConf.SIMPLE_SECURITY_SECRET_PROVIDER_PROVIDER_SECRET, "ENGINE____SECRET") .set( KyuubiConf.SESSION_LOCAL_DIR_ALLOW_LIST, Seq(Paths.get(sparkBatchTestResource.get).getParent.toString)) @@ -199,6 +202,56 @@ class BatchesResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper wi assert(!deleteBatchResponse.readEntity(classOf[CloseBatchResponse]).isSuccess) } + test("open batch session with uploading resource") { + val requestObj = newSparkBatchRequest(Map("spark.master" -> "local")) + val exampleJarFile = Paths.get(sparkBatchTestResource.get).toFile + val multipart = new FormDataMultiPart() + .field("batchRequest", requestObj, MediaType.APPLICATION_JSON_TYPE) + .bodyPart(new FileDataBodyPart("resourceFile", exampleJarFile)) + .asInstanceOf[FormDataMultiPart] + + val response = webTarget.path("api/v1/batches") + .request(MediaType.APPLICATION_JSON) + .post(Entity.entity(multipart, MediaType.MULTIPART_FORM_DATA)) + assert(200 == response.getStatus) + val batch = response.readEntity(classOf[Batch]) + assert(batch.getKyuubiInstance === fe.connectionUrl) + assert(batch.getBatchType === "SPARK") + assert(batch.getName === sparkBatchTestAppName) + assert(batch.getCreateTime > 0) + assert(batch.getEndTime === 0) + + webTarget.path(s"api/v1/batches/${batch.getId()}").request( + MediaType.APPLICATION_JSON_TYPE).delete() + eventually(timeout(3.seconds)) { + assert(KyuubiApplicationManager.uploadWorkDir.toFile.listFiles().isEmpty) + } + } + + test("open batch session w/ batch id") { + val batchId = UUID.randomUUID().toString + val reqObj = newSparkBatchRequest(Map( + "spark.master" -> "local", + KYUUBI_BATCH_ID_KEY -> batchId)) + + val resp1 = webTarget.path("api/v1/batches") + .request(MediaType.APPLICATION_JSON_TYPE) + .post(Entity.entity(reqObj, MediaType.APPLICATION_JSON_TYPE)) + assert(200 == resp1.getStatus) + val batch1 = resp1.readEntity(classOf[Batch]) + assert(batch1.getId === batchId) + + val resp2 = webTarget.path("api/v1/batches") + .request(MediaType.APPLICATION_JSON_TYPE) + .post(Entity.entity(reqObj, MediaType.APPLICATION_JSON_TYPE)) + assert(200 == resp2.getStatus) + val batch2 = resp2.readEntity(classOf[Batch]) + assert(batch2.getId === batchId) + + assert(batch1.getCreateTime === batch2.getCreateTime) + assert(BatchUtils.isDuplicatedSubmission(batch2)) + } + test("get batch session list") { val sessionManager = server.frontendServices.head .be.sessionManager.asInstanceOf[KyuubiSessionManager] @@ -223,7 +276,7 @@ class BatchesResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper wi "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newBatchRequest( "spark", sparkBatchTestResource.get, @@ -245,7 +298,7 @@ class BatchesResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper wi "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newBatchRequest( "spark", sparkBatchTestResource.get, @@ -255,7 +308,7 @@ class BatchesResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper wi "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newBatchRequest( "spark", sparkBatchTestResource.get, @@ -645,7 +698,7 @@ class BatchesResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper wi "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newSparkBatchRequest(Map("spark.jars" -> "disAllowPath"))) } val sessionHandleRegex = "\\[[\\S]*\\]".r diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala index 238203b0bad..51701b231a0 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala @@ -29,12 +29,16 @@ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KyuubiFunSuite, RestFrontendTestHelper} import org.apache.kyuubi.client.api.v1.dto._ +import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.events.KyuubiOperationEvent import org.apache.kyuubi.operation.{ExecuteStatement, OperationState} import org.apache.kyuubi.operation.OperationState.{FINISHED, OperationState} class OperationsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { + override protected lazy val conf: KyuubiConf = KyuubiConf() + .set(KyuubiConf.SERVER_LIMIT_CLIENT_FETCH_MAX_ROWS, 5000) + test("get an operation event") { val catalogsHandleStr = getOpHandleStr("") checkOpState(catalogsHandleStr, FINISHED) @@ -126,6 +130,40 @@ class OperationsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper assert(logRowSet.getRowCount == 1) } + test("test invalid max rows") { + val opHandleStr = getOpHandleStr("select \"test\", 1, 0.32d, true") + checkOpState(opHandleStr, FINISHED) + val response = webTarget.path( + s"api/v1/operations/$opHandleStr/rowset") + .queryParam("maxrows", "10000") + .request(MediaType.APPLICATION_JSON).get() + assert(400 == response.getStatus) + } + + test("test get result row set with null value") { + val opHandleStr = getOpHandleStr( + s""" + |select + |cast(null as string) as c1, + |cast(null as boolean) as c2, + |cast(null as byte) as c3, + |cast(null as double) as c4, + |cast(null as short) as c5, + |cast(null as int) as c6, + |cast(null as bigint) as c7 + |""".stripMargin) + checkOpState(opHandleStr, FINISHED) + val response = webTarget.path( + s"api/v1/operations/$opHandleStr/rowset") + .queryParam("maxrows", "2") + .queryParam("fetchorientation", "FETCH_NEXT") + .request(MediaType.APPLICATION_JSON).get() + assert(200 == response.getStatus) + val logRowSet = response.readEntity(classOf[ResultRowSet]) + assert(logRowSet.getRows.asScala.head.getFields.asScala.forall(_.getValue == null)) + assert(logRowSet.getRowCount == 1) + } + def getOpHandleStr(statement: String = "show tables"): String = { val sessionHandle = fe.be.openSession( HIVE_CLI_SERVICE_PROTOCOL_V2, diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala index db5e1360bcf..07a711de6bc 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.server.api.v1 import java.nio.charset.StandardCharsets import java.util -import java.util.Base64 +import java.util.{Base64, Collections} import javax.ws.rs.client.Entity import javax.ws.rs.core.{GenericType, MediaType, Response} @@ -28,10 +28,11 @@ import scala.collection.JavaConverters._ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KyuubiFunSuite, RestFrontendTestHelper} +import org.apache.kyuubi.client.api.v1.dto import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_CONNECTION_URL_KEY -import org.apache.kyuubi.events.KyuubiSessionEvent +import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.OperationHandle import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER @@ -47,9 +48,7 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { } test("open/close and count session") { - val requestObj = new SessionOpenRequest( - 1, - Map("testConfig" -> "testValue").asJava) + val requestObj = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) var response = webTarget.path("api/v1/sessions") .request(MediaType.APPLICATION_JSON_TYPE) @@ -80,9 +79,7 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { } test("getSessionList") { - val requestObj = new SessionOpenRequest( - 1, - Map("testConfig" -> "testValue").asJava) + val requestObj = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) var response = webTarget.path("api/v1/sessions") .request(MediaType.APPLICATION_JSON_TYPE) @@ -108,9 +105,7 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { } test("get session event") { - val sessionOpenRequest = new SessionOpenRequest( - 1, - Map("testConfig" -> "testValue").asJava) + val sessionOpenRequest = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) val user = "kyuubi".getBytes() @@ -126,10 +121,10 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { // get session event var response = webTarget.path(s"api/v1/sessions/$sessionHandle").request().get() assert(200 == sessionOpenResp.getStatus) - val sessions = response.readEntity(classOf[KyuubiSessionEvent]) - assert(sessions.conf("testConfig").equals("testValue")) - assert(sessions.sessionType.equals(SessionType.INTERACTIVE.toString)) - assert(sessions.user.equals("kyuubi")) + val sessions = response.readEntity(classOf[dto.KyuubiSessionEvent]) + assert(sessions.getConf.get("testConfig").equals("testValue")) + assert(sessions.getSessionType.equals(SessionType.INTERACTIVE.toString)) + assert(sessions.getUser.equals("kyuubi")) // close an opened session response = webTarget.path(s"api/v1/sessions/$sessionHandle").request().delete() @@ -146,9 +141,9 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { val failedConnections = MetricsSystem.counterValue(MetricsConstants.REST_CONN_FAIL).getOrElse(0L) - val requestObj = new SessionOpenRequest( - 1, - Map("testConfig" -> "testValue", KyuubiConf.SERVER_INFO_PROVIDER.key -> "SERVER").asJava) + val requestObj = new SessionOpenRequest(Map( + "testConfig" -> "testValue", + KyuubiConf.SERVER_INFO_PROVIDER.key -> "SERVER").asJava) var response: Response = webTarget.path("api/v1/sessions") .request(MediaType.APPLICATION_JSON_TYPE) @@ -187,9 +182,7 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { } test("submit operation and get operation handle") { - val requestObj = new SessionOpenRequest( - 1, - Map("testConfig" -> "testValue").asJava) + val requestObj = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) var response: Response = webTarget.path("api/v1/sessions") .request(MediaType.APPLICATION_JSON_TYPE) @@ -199,7 +192,7 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { val pathPrefix = s"api/v1/sessions/$sessionHandle" - val statementReq = new StatementRequest("show tables", true, 3000) + var statementReq = new StatementRequest("show tables", true, 3000) response = webTarget .path(s"$pathPrefix/operations/statement").request(MediaType.APPLICATION_JSON_TYPE) .post(Entity.entity(statementReq, MediaType.APPLICATION_JSON_TYPE)) @@ -207,6 +200,18 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { var operationHandle = response.readEntity(classOf[OperationHandle]) assert(operationHandle !== null) + statementReq = new StatementRequest( + "spark.sql(\"show tables\")", + true, + 3000, + Collections.singletonMap(KyuubiConf.OPERATION_LANGUAGE.key, "SCALA")) + response = webTarget + .path(s"$pathPrefix/operations/statement").request(MediaType.APPLICATION_JSON_TYPE) + .post(Entity.entity(statementReq, MediaType.APPLICATION_JSON_TYPE)) + assert(200 == response.getStatus) + operationHandle = response.readEntity(classOf[OperationHandle]) + assert(operationHandle !== null) + response = webTarget.path(s"$pathPrefix/operations/typeInfo").request() .post(Entity.entity(null, MediaType.APPLICATION_JSON_TYPE)) assert(200 == response.getStatus) @@ -277,4 +282,23 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { .post(Entity.entity(getCrossReferenceReq, MediaType.APPLICATION_JSON_TYPE)) assert(404 == response.getStatus) } + + test("post session exception if failed to open engine session") { + val requestObj = new SessionOpenRequest(Map( + "spark.master" -> "invalid", + KyuubiConf.ENGINE_SHARE_LEVEL.key -> ShareLevel.CONNECTION.toString).asJava) + + var response = webTarget.path("api/v1/sessions") + .request(MediaType.APPLICATION_JSON_TYPE) + .post(Entity.entity(requestObj, MediaType.APPLICATION_JSON_TYPE)) + + val sessionHandle = response.readEntity(classOf[SessionHandle]).getIdentifier + + eventually(timeout(1.minutes), interval(200.milliseconds)) { + response = webTarget.path(s"api/v1/sessions/$sessionHandle").request().get() + // will meet json parse exception with response.readEntity(classOf[KyuubiSessionEvent]) + val sessionEvent = response.readEntity(classOf[String]) + assert(sessionEvent.contains("The last 10 line(s) of log are:")) + } + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala index d8a8af20274..75c935a3de2 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala @@ -25,103 +25,152 @@ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KyuubiException, KyuubiFunSuite} import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} +import org.apache.kyuubi.metrics.MetricsConstants._ import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.SessionType class MetadataManagerSuite extends KyuubiFunSuite { - val metadataManager = new MetadataManager() - val metricsSystem = new MetricsSystem() - val conf = KyuubiConf().set(KyuubiConf.METADATA_REQUEST_RETRY_INTERVAL, 100L) - - override def beforeAll(): Unit = { - super.beforeAll() - metricsSystem.initialize(conf) - metricsSystem.start() - metadataManager.initialize(conf) - metadataManager.start() - } - override def afterAll(): Unit = { - metadataManager.getBatches(null, null, null, 0, 0, 0, Int.MaxValue).foreach { batch => - metadataManager.cleanupMetadataById(batch.getId) + test("fail fast on duplicated key") { + Seq("true", "false").foreach { enableAsyncRetry => + withMetadataManager(Map( + METADATA_REQUEST_ASYNC_RETRY_ENABLED.key -> enableAsyncRetry, + METADATA_REQUEST_RETRY_INTERVAL.key -> "100")) { metadataManager => + val metadata = newMetadata() + metadataManager.insertMetadata(metadata) + Seq(true, false).foreach { asyncRetryOnError => + intercept[KyuubiException] { + metadataManager.insertMetadata(metadata, asyncRetryOnError) + } + } + } } - metadataManager.stop() - metricsSystem.stop() - super.afterAll() } - override protected def afterEach(): Unit = { - eventually(timeout(5.seconds), interval(200.milliseconds)) { - assert(MetricsSystem.counterValue( - MetricsConstants.METADATA_REQUEST_OPENED).getOrElse(0L) === 0) + test("async retry the metadata store requests") { + withMetadataManager( + Map( + METADATA_REQUEST_ASYNC_RETRY_ENABLED.key -> "true", + METADATA_REQUEST_RETRY_INTERVAL.key -> "100"), + () => + new MetadataManager { + override protected def unrecoverableDBErr(cause: Throwable): Boolean = false + }) { metadataManager => + val metadata = newMetadata() + metadataManager.insertMetadata(metadata) + metadataManager.insertMetadata(metadata, asyncRetryOnError = true) + val retryRef = metadataManager.getMetadataRequestsRetryRef(metadata.identifier) + val metadataToUpdate = metadata.copy(state = "RUNNING") + retryRef.addRetryingMetadataRequest(UpdateMetadata(metadataToUpdate)) + eventually(timeout(3.seconds)) { + assert(retryRef.hasRemainingRequests()) + assert(metadataManager.getBatch(metadata.identifier).getState === "PENDING") + } + + val metadata2 = metadata.copy(identifier = UUID.randomUUID().toString) + val metadata2ToUpdate = metadata2.copy( + engineId = "app_id", + engineName = "app_name", + engineUrl = "app_url", + engineState = "app_state", + state = "RUNNING") + + metadataManager.addMetadataRetryRequest(InsertMetadata(metadata2)) + metadataManager.addMetadataRetryRequest(UpdateMetadata(metadata2ToUpdate)) + + val retryRef2 = metadataManager.getMetadataRequestsRetryRef(metadata2.identifier) + + eventually(timeout(3.seconds)) { + assert(!retryRef2.hasRemainingRequests()) + assert(metadataManager.getBatch(metadata2.identifier).getState === "RUNNING") + } + + metadataManager.identifierRequestsAsyncRetryRefs.clear() + eventually(timeout(3.seconds)) { + metadataManager.identifierRequestsAsyncRetryingCounts.asScala.forall(_._2.get() == 0) + } + metadataManager.identifierRequestsAsyncRetryingCounts.clear() } } - test("retry the metadata store requests") { - val metadata = Metadata( - identifier = UUID.randomUUID().toString, - sessionType = SessionType.BATCH, - realUser = "kyuubi", - username = "kyuubi", - ipAddress = "127.0.0.1", - kyuubiInstance = "localhost:10009", - state = "PENDING", - resource = "intern", - className = "org.apache.kyuubi.SparkWC", - requestName = "kyuubi_batch", - requestConf = Map("spark.master" -> "local"), - requestArgs = Seq("100"), - createTime = System.currentTimeMillis(), - engineType = "spark", - clusterManager = Some("local")) - metadataManager.insertMetadata(metadata) - intercept[KyuubiException] { - metadataManager.insertMetadata(metadata, retryOnError = false) - } - metadataManager.insertMetadata(metadata, retryOnError = true) - val retryRef = metadataManager.getMetadataRequestsRetryRef(metadata.identifier) - val metadataToUpdate = metadata.copy(state = "RUNNING") - retryRef.addRetryingMetadataRequest(UpdateMetadata(metadataToUpdate)) - eventually(timeout(3.seconds)) { - assert(retryRef.hasRemainingRequests()) - assert(metadataManager.getBatch(metadata.identifier).getState === "PENDING") - } - - val metadata2 = metadata.copy(identifier = UUID.randomUUID().toString) - val metadata2ToUpdate = metadata2.copy( - engineId = "app_id", - engineName = "app_name", - engineUrl = "app_url", - engineState = "app_state", - state = "RUNNING") - - metadataManager.addMetadataRetryRequest(InsertMetadata(metadata2)) - metadataManager.addMetadataRetryRequest(UpdateMetadata(metadata2ToUpdate)) - - val retryRef2 = metadataManager.getMetadataRequestsRetryRef(metadata2.identifier) - - eventually(timeout(3.seconds)) { - assert(!retryRef2.hasRemainingRequests()) - assert(metadataManager.getBatch(metadata2.identifier).getState === "RUNNING") + test("async metadata request metrics") { + withMetadataManager(Map( + METADATA_REQUEST_ASYNC_RETRY_ENABLED.key -> "true", + METADATA_REQUEST_RETRY_INTERVAL.key -> "100")) { metadataManager => + val totalRequests = MetricsSystem.meterValue(METADATA_REQUEST_TOTAL).getOrElse(0L) + val failedRequests = MetricsSystem.meterValue(METADATA_REQUEST_FAIL).getOrElse(0L) + val retryingRequests = MetricsSystem.meterValue(METADATA_REQUEST_RETRYING).getOrElse(0L) + + val metadata = newMetadata() + metadataManager.insertMetadata(metadata) + + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL) + .getOrElse(0L) - totalRequests === 1) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL) + .getOrElse(0L) - failedRequests === 0) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_RETRYING) + .getOrElse(0L) - retryingRequests === 0) + + val invalidMetadata = metadata.copy(kyuubiInstance = null) + intercept[Exception](metadataManager.insertMetadata(invalidMetadata, false)) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL) + .getOrElse(0L) - totalRequests === 2) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL) + .getOrElse(0L) - failedRequests === 1) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_RETRYING) + .getOrElse(0L) - retryingRequests === 0) + + metadataManager.insertMetadata(invalidMetadata, true) + + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL) + .getOrElse(0L) - totalRequests === 3) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL) + .getOrElse(0L) - failedRequests === 2) + assert( + MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_RETRYING) + .getOrElse(0L) - retryingRequests === 1) } + } - metadataManager.identifierRequestsRetryRefs.clear() - eventually(timeout(3.seconds)) { - metadataManager.identifierRequestsRetryingCounts.asScala.forall(_._2.get() == 0) + private def withMetadataManager( + confOverlay: Map[String, String], + newMetadataMgr: () => MetadataManager = () => new MetadataManager())( + f: MetadataManager => Unit): Unit = { + val metricsSystem = new MetricsSystem() + val metadataManager = newMetadataMgr() + val conf = KyuubiConf() + confOverlay.foreach { case (k, v) => conf.set(k, v) } + try { + metricsSystem.initialize(conf) + metricsSystem.start() + metadataManager.initialize(conf) + metadataManager.start() + f(metadataManager) + } finally { + metadataManager.getBatches(null, null, null, 0, 0, 0, Int.MaxValue).foreach { batch => + metadataManager.cleanupMetadataById(batch.getId) + } + // ensure no metadata request leak + eventually(timeout(5.seconds), interval(200.milliseconds)) { + assert(MetricsSystem.counterValue(METADATA_REQUEST_OPENED).getOrElse(0L) === 0) + } + metadataManager.stop() + metricsSystem.stop() } - metadataManager.identifierRequestsRetryingCounts.clear() } - test("metadata request metrics") { - val totalRequests = - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL).getOrElse(0L) - val failedRequests = - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL).getOrElse(0L) - val retryingRequests = - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_RETRYING).getOrElse(0L) - - val metadata = Metadata( + private def newMetadata(): Metadata = { + Metadata( identifier = UUID.randomUUID().toString, sessionType = SessionType.BATCH, realUser = "kyuubi", @@ -137,37 +186,5 @@ class MetadataManagerSuite extends KyuubiFunSuite { createTime = System.currentTimeMillis(), engineType = "spark", clusterManager = Some("local")) - metadataManager.insertMetadata(metadata) - - assert( - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL).getOrElse( - 0L) - totalRequests === 1) - assert( - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL).getOrElse( - 0L) - failedRequests === 0) - assert(MetricsSystem.meterValue( - MetricsConstants.METADATA_REQUEST_RETRYING).getOrElse(0L) - retryingRequests === 0) - - val invalidMetadata = metadata.copy(kyuubiInstance = null) - intercept[Exception](metadataManager.insertMetadata(invalidMetadata, false)) - assert( - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL).getOrElse( - 0L) - totalRequests === 2) - assert( - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL).getOrElse( - 0L) - failedRequests === 1) - assert(MetricsSystem.meterValue( - MetricsConstants.METADATA_REQUEST_RETRYING).getOrElse(0L) - retryingRequests === 0) - - metadataManager.insertMetadata(invalidMetadata, true) - - assert( - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_TOTAL).getOrElse( - 0L) - totalRequests === 3) - assert( - MetricsSystem.meterValue(MetricsConstants.METADATA_REQUEST_FAIL).getOrElse( - 0L) - failedRequests === 2) - assert(MetricsSystem.meterValue( - MetricsConstants.METADATA_REQUEST_RETRYING).getOrElse(0L) - retryingRequests === 1) } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala index 73dc105c3d6..aa53af3a908 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala @@ -279,4 +279,20 @@ class JDBCMetadataStoreSuite extends KyuubiFunSuite { jdbcMetadataStore.updateMetadata(metadata) } } + + test("get schema urls with correct version ordering") { + val url1 = "metadata-store-schema-1.7.0.mysql.sql" + val url2 = "metadata-store-schema-1.7.1.mysql.sql" + val url3 = "metadata-store-schema-1.8.0.mysql.sql" + val url4 = "metadata-store-schema-1.10.0.mysql.sql" + val url5 = "metadata-store-schema-2.1.0.mysql.sql" + assert(jdbcMetadataStore.getSchemaVersion(url1) === ((1, 7, 0))) + assert(jdbcMetadataStore.getSchemaVersion(url2) === ((1, 7, 1))) + assert(jdbcMetadataStore.getSchemaVersion(url3) === ((1, 8, 0))) + assert(jdbcMetadataStore.getSchemaVersion(url4) === ((1, 10, 0))) + assert(jdbcMetadataStore.getSchemaVersion(url5) === ((2, 1, 0))) + assert(jdbcMetadataStore.getLatestSchemaUrl(Seq(url1, url2, url3, url4)).get === url4) + assert(jdbcMetadataStore.getLatestSchemaUrl(Seq(url1, url3, url4, url2)).get === url4) + assert(jdbcMetadataStore.getLatestSchemaUrl(Seq(url1, url2, url3, url4, url5)).get === url5) + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/mysql/MySQLJDBCTestHelper.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/mysql/MySQLJDBCTestHelper.scala index c258b6e6924..e6df1fb20b9 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/mysql/MySQLJDBCTestHelper.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/mysql/MySQLJDBCTestHelper.scala @@ -22,7 +22,7 @@ import org.apache.kyuubi.operation.JDBCTestHelper trait MySQLJDBCTestHelper extends JDBCTestHelper { - override def jdbcDriverClass: String = "com.mysql.jdbc.Driver" + override def jdbcDriverClass: String = "com.mysql.cj.jdbc.Driver" protected lazy val user: String = Utils.currentUser @@ -42,7 +42,7 @@ trait MySQLJDBCTestHelper extends JDBCTestHelper { if (jdbcConfigs.isEmpty) { "" } else { - "?" + jdbcConfigs.map(kv => kv._1 + "=" + kv._2).mkString(";") + "?" + jdbcConfigs.map(kv => kv._1 + "=" + kv._2).mkString("&") } jdbcUrl + jdbcConfStr } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminCtlSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminCtlSuite.scala index f7cbb20016c..389b67e4738 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminCtlSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminCtlSuite.scala @@ -26,6 +26,7 @@ import org.apache.kyuubi.engine.EngineRef import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient import org.apache.kyuubi.ha.client.DiscoveryPaths +import org.apache.kyuubi.plugin.PluginLoader class AdminCtlSuite extends RestClientTestHelper with TestPrematureExit { override def beforeAll(): Unit = { @@ -53,8 +54,10 @@ class AdminCtlSuite extends RestClientTestHelper with TestPrematureExit { conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) conf.set(KyuubiConf.AUTHENTICATION_METHOD, Seq("LDAP", "CUSTOM")) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + val user = ldapUser - val engine = new EngineRef(conf.clone, user, "grp", id, null) + val engine = new EngineRef(conf.clone, user, PluginLoader.loadGroupProvider(conf), id, null) val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_USER_SPARK_SQL", diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala index ab1a102026c..b79e62a12f4 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala @@ -21,6 +21,8 @@ import java.util.UUID import scala.collection.JavaConverters.asScalaBufferConverter +import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 + import org.apache.kyuubi.{KYUUBI_VERSION, RestClientTestHelper} import org.apache.kyuubi.client.{AdminRestApi, KyuubiRestClient} import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} @@ -28,6 +30,7 @@ import org.apache.kyuubi.engine.EngineRef import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient import org.apache.kyuubi.ha.client.DiscoveryPaths +import org.apache.kyuubi.plugin.PluginLoader class AdminRestApiSuite extends RestClientTestHelper { test("refresh kyuubi server hadoop conf") { @@ -46,8 +49,9 @@ class AdminRestApiSuite extends RestClientTestHelper { conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) conf.set(KyuubiConf.AUTHENTICATION_METHOD, Seq("LDAP", "CUSTOM")) + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") val user = ldapUser - val engine = new EngineRef(conf.clone, user, "grp", id, null) + val engine = new EngineRef(conf.clone, user, PluginLoader.loadGroupProvider(conf), id, null) val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_USER_SPARK_SQL", @@ -84,4 +88,64 @@ class AdminRestApiSuite extends RestClientTestHelper { engines = adminRestApi.listEngines("spark_sql", "user", "default", "").asScala assert(engines.size == 0) } + + test("list/close session") { + fe.be.sessionManager.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "admin", + "123456", + "localhost", + Map("testConfig" -> "testValue")) + + val spnegoKyuubiRestClient: KyuubiRestClient = + KyuubiRestClient.builder(baseUri.toString) + .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.SPNEGO) + .spnegoHost("localhost") + .build() + val adminRestApi = new AdminRestApi(spnegoKyuubiRestClient) + + // list sessions + var sessions = adminRestApi.listSessions().asScala + assert(sessions.nonEmpty) + assert(sessions.head.getUser == "admin") + + // close session + val response = adminRestApi.closeSession(sessions.head.getIdentifier) + assert(response.contains("success")) + + // list again + sessions = adminRestApi.listSessions().asScala + assert(sessions.isEmpty) + } + + test("list/close operation") { + val sessionHandle = fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "admin", + "123456", + "localhost", + Map("testConfig" -> "testValue")) + val operation = fe.be.getCatalogs(sessionHandle) + + val spnegoKyuubiRestClient: KyuubiRestClient = + KyuubiRestClient.builder(baseUri.toString) + .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.SPNEGO) + .spnegoHost("localhost") + .build() + val adminRestApi = new AdminRestApi(spnegoKyuubiRestClient) + + // list operations + var operations = adminRestApi.listOperations().asScala + assert(operations.nonEmpty) + assert(operations.map(op => op.getIdentifier).contains(operation.identifier.toString)) + + // close operation + val response = adminRestApi.closeOperation(operation.identifier.toString) + assert(response.contains("success")) + + // list again + operations = adminRestApi.listOperations().asScala + assert(!operations.map(op => op.getIdentifier).contains(operation.identifier.toString)) + + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala index 9d0a9b15a4a..ff807ef027b 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala @@ -21,6 +21,7 @@ import java.io.File import java.net.InetAddress import java.nio.charset.StandardCharsets import java.nio.file.{Files, Paths} +import java.util.UUID import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.shaded.com.nimbusds.jose.util.StandardCharset @@ -28,6 +29,7 @@ import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{BatchTestHelper, RestClientTestHelper, Utils} +import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.ctl.{CtlConf, TestPrematureExit} import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} @@ -256,7 +258,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newBatchRequest( "spark", "", @@ -278,7 +280,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newBatchRequest( "spark", "", @@ -288,7 +290,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "kyuubi", "kyuubi", InetAddress.getLocalHost.getCanonicalHostName, - Map.empty, + Map(KYUUBI_BATCH_ID_KEY -> UUID.randomUUID().toString), newBatchRequest( "spark", "", diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala index b425f62d65f..cb7905286f9 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala @@ -17,14 +17,16 @@ package org.apache.kyuubi.server.rest.client +import java.nio.file.Paths import java.util.Base64 import org.scalatest.time.SpanSugar.convertIntToGrainOfTime -import org.apache.kyuubi.{BatchTestHelper, RestClientTestHelper} +import org.apache.kyuubi.{BatchTestHelper, KYUUBI_VERSION, RestClientTestHelper} import org.apache.kyuubi.client.{BatchRestApi, KyuubiRestClient} import org.apache.kyuubi.client.api.v1.dto.Batch import org.apache.kyuubi.client.exception.KyuubiRestException +import org.apache.kyuubi.config.KyuubiReservedKeys import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.session.{KyuubiSession, SessionHandle} @@ -83,6 +85,25 @@ class BatchRestApiSuite extends RestClientTestHelper with BatchTestHelper { basicKyuubiRestClient.close() } + test("basic batch rest client with uploading resource file") { + val basicKyuubiRestClient: KyuubiRestClient = + KyuubiRestClient.builder(baseUri.toString) + .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.BASIC) + .username(ldapUser) + .password(ldapUserPasswd) + .socketTimeout(30000) + .build() + val batchRestApi: BatchRestApi = new BatchRestApi(basicKyuubiRestClient) + + val requestObj = newSparkBatchRequest(Map("spark.master" -> "local")) + val exampleJarFile = Paths.get(sparkBatchTestResource.get).toFile + val batch: Batch = batchRestApi.createBatch(requestObj, exampleJarFile) + + assert(batch.getKyuubiInstance === fe.connectionUrl) + assert(batch.getBatchType === "SPARK") + basicKyuubiRestClient.close() + } + test("basic batch rest client with invalid user") { val totalConnections = MetricsSystem.counterValue(MetricsConstants.REST_CONN_TOTAL).getOrElse(0L) @@ -195,4 +216,22 @@ class BatchRestApiSuite extends RestClientTestHelper with BatchTestHelper { batchRestApi.listBatches(null, null, null, 0, 0, 0, 1) batchRestApi.listBatches(null, null, null, 0, 0, 0, 1) } + + test("support to transfer client version when creating batch") { + val spnegoKyuubiRestClient: KyuubiRestClient = + KyuubiRestClient.builder(baseUri.toString) + .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.SPNEGO) + .spnegoHost("localhost") + .build() + val batchRestApi: BatchRestApi = new BatchRestApi(spnegoKyuubiRestClient) + // create batch + val requestObj = + newSparkBatchRequest(Map("spark.master" -> "local")) + + val batch = batchRestApi.createBatch(requestObj) + val batchSession = + server.backendService.sessionManager.getSession(SessionHandle.fromUUID(batch.getId)) + assert( + batchSession.conf.get(KyuubiReservedKeys.KYUUBI_CLIENT_VERSION_KEY) == Some(KYUUBI_VERSION)) + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala index 1edfb5e5393..ed116d077cc 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala @@ -17,21 +17,153 @@ package org.apache.kyuubi.server.rest.client -import scala.collection.JavaConverters.asScalaBufferConverter +import java.util -import org.apache.hive.service.rpc.thrift.TProtocolVersion +import scala.collection.JavaConverters._ + +import org.apache.hive.service.rpc.thrift.TGetInfoType import org.apache.kyuubi.RestClientTestHelper import org.apache.kyuubi.client.{KyuubiRestClient, SessionRestApi} +import org.apache.kyuubi.client.api.v1.dto +import org.apache.kyuubi.client.api.v1.dto._ +import org.apache.kyuubi.client.exception.KyuubiRestException +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.session.SessionType class SessionRestApiSuite extends RestClientTestHelper { - test("list session") { - fe.be.sessionManager.openSession( - TProtocolVersion.findByValue(1), - "admin", - "123456", - "localhost", - Map("testConfig" -> "testValue")) + test("get/close/list/count session") { + withSessionRestApi { sessionRestApi => + { + // open session + val sessionOpenRequest = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) + sessionRestApi.openSession(sessionOpenRequest) + + // list sessions + var sessions = sessionRestApi.listSessions().asScala + assert(sessions.size == 1) + val sessionHandle = sessions(0).getIdentifier + + // get open session count + var sessionCount = sessionRestApi.getOpenSessionCount + assert(sessionCount == 1) + + // close session + sessionRestApi.closeSession(sessionHandle) + + // list sessions again + sessions = sessionRestApi.listSessions().asScala + assert(sessions.isEmpty) + + // get open session count again + sessionCount = sessionRestApi.getOpenSessionCount + assert(sessionCount == 0) + } + } + } + + test("get session event") { + withSessionRestApi { sessionRestApi => + // open session + val sessionOpenRequest = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) + val sessionHandle = sessionRestApi.openSession(sessionOpenRequest) + + // get session event + val kyuubiEvent = sessionRestApi.getSessionEvent( + sessionHandle.getIdentifier.toString).asInstanceOf[dto.KyuubiSessionEvent] + assert(kyuubiEvent.getConf.get("testConfig").equals("testValue")) + assert(kyuubiEvent.getSessionType.equals(SessionType.INTERACTIVE.toString)) + } + } + + test("get info type") { + withSessionRestApi { sessionRestApi => + // open session + val sessionOpenRequest = new SessionOpenRequest( + Map("testConfig" -> "testValue", KyuubiConf.SERVER_INFO_PROVIDER.key -> "SERVER").asJava) + val sessionHandle = sessionRestApi.openSession(sessionOpenRequest) + + // get session info + val info = sessionRestApi.getSessionInfo( + sessionHandle.getIdentifier.toString, + TGetInfoType.CLI_SERVER_NAME.getValue) + assert(info.getInfoType.equals("CLI_SERVER_NAME")) + assert(info.getInfoValue.equals("Apache Kyuubi")) + } + } + + test("submit operation") { + withSessionRestApi { sessionRestApi => + // open session + val sessionOpenRequest = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) + val sessionHandle = sessionRestApi.openSession(sessionOpenRequest) + val sessionHandleStr = sessionHandle.getIdentifier.toString + + // execute statement + val op1 = sessionRestApi.executeStatement( + sessionHandleStr, + new StatementRequest("show tables", true, 3000)) + assert(op1.getIdentifier != null) + + // get type info + val op2 = sessionRestApi.getTypeInfo(sessionHandleStr) + assert(op2.getIdentifier != null) + + // get catalogs + val op3 = sessionRestApi.getCatalogs(sessionHandleStr) + assert(op3.getIdentifier != null) + + // get schemas + val op4 = sessionRestApi.getSchemas( + sessionHandleStr, + new GetSchemasRequest("spark_catalog", "default")) + assert(op4.getIdentifier != null) + + // get tables + val tableTypes = new util.ArrayList[String]() + val op5 = sessionRestApi.getTables( + sessionHandleStr, + new GetTablesRequest("spark_catalog", "default", "default", tableTypes)) + assert(op5.getIdentifier != null) + + // get table types + val op6 = sessionRestApi.getTableTypes(sessionHandleStr) + assert(op6.getIdentifier != null) + + // get columns + val op7 = sessionRestApi.getColumns( + sessionHandleStr, + new GetColumnsRequest("spark_catalog", "default", "default", "default")) + assert(op7.getIdentifier != null) + + // get function + val op8 = sessionRestApi.getFunctions( + sessionHandleStr, + new GetFunctionsRequest("default", "default", "default")) + assert(op8.getIdentifier != null) + + // get primary keys + assertThrows[KyuubiRestException] { + sessionRestApi.getPrimaryKeys( + sessionHandleStr, + new GetPrimaryKeysRequest("spark_catalog", "default", "default")) + } + + // get cross reference + val getCrossReferenceReq = new GetCrossReferenceRequest( + "spark_catalog", + "default", + "default", + "spark_catalog", + "default", + "default") + assertThrows[KyuubiRestException] { + sessionRestApi.getCrossReference(sessionHandleStr, getCrossReferenceReq) + } + } + } + + def withSessionRestApi[T](f: SessionRestApi => T): T = { val basicKyuubiRestClient: KyuubiRestClient = KyuubiRestClient.builder(baseUri.toString) .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.BASIC) @@ -39,12 +171,7 @@ class SessionRestApiSuite extends RestClientTestHelper { .password(ldapUserPasswd) .socketTimeout(30000) .build() - val sessionRestApi = new SessionRestApi(basicKyuubiRestClient) - val sessions = sessionRestApi.listSessions().asScala - assert(sessions.size == 1) - assert(sessions(0).getUser == "admin") - assert(sessions(0).getIpAddr == "localhost") - assert(sessions(0).getConf.toString == "{testConfig=testValue}") + f(sessionRestApi) } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoClientApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoClientApiSuite.scala new file mode 100644 index 00000000000..478bf917463 --- /dev/null +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoClientApiSuite.scala @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.server.trino.api + +import java.net.URI +import java.time.ZoneId +import java.util.{Collections, Locale, Optional} +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicReference + +import scala.annotation.tailrec +import scala.collection.JavaConverters._ + +import com.google.common.base.Verify +import io.airlift.units.Duration +import io.trino.client.{ClientSession, StatementClient, StatementClientFactory} +import okhttp3.OkHttpClient + +import org.apache.kyuubi.{KyuubiFunSuite, KyuubiSQLException, TrinoRestFrontendTestHelper} + +class TrinoClientApiSuite extends KyuubiFunSuite with TrinoRestFrontendTestHelper { + + private val httpClient = + new OkHttpClient.Builder() + .readTimeout(5, TimeUnit.MINUTES) + .build() + private lazy val clientSession = + new AtomicReference[ClientSession](createTestClientSession(baseUri)) + + test("submit query with trino client api") { + val trino = getTrinoStatementClient("select 1") + val result = execute(trino) + val sessionId = trino.getSetSessionProperties.asScala.get(Query.KYUUBI_SESSION_ID) + assert(result == List(List(1))) + + updateClientSession(trino) + + val trino1 = getTrinoStatementClient("set k=v") + val result1 = execute(trino1) + val sessionId1 = trino1.getSetSessionProperties.asScala.get(Query.KYUUBI_SESSION_ID) + assert(result1 == List(List("k", "v"))) + assert(sessionId == sessionId1) + + updateClientSession(trino) + + val trino2 = getTrinoStatementClient("set k") + val result2 = execute(trino2) + val sessionId2 = trino2.getSetSessionProperties.asScala.get(Query.KYUUBI_SESSION_ID) + assert(result2 == List(List("k", "v"))) + assert(sessionId == sessionId2) + + trino.close() + } + + private def updateClientSession(trino: StatementClient): Unit = { + val session = clientSession.get + + var builder = ClientSession.builder(session) + // update catalog and schema + if (trino.getSetCatalog.isPresent || trino.getSetSchema.isPresent) { + builder = builder + .withCatalog(trino.getSetCatalog.orElse(session.getCatalog)) + .withSchema(trino.getSetSchema.orElse(session.getSchema)) + } + + // update path if present + if (trino.getSetPath.isPresent) { + builder = builder.withPath(trino.getSetPath.get) + } + + // update session properties if present + if (!trino.getSetSessionProperties.isEmpty || !trino.getResetSessionProperties.isEmpty) { + val properties = session.getProperties.asScala.clone() + properties ++= trino.getSetSessionProperties.asScala + properties --= trino.getResetSessionProperties.asScala + builder = builder.withProperties(properties.asJava) + } + clientSession.set(builder.build()) + } + + private def execute(trino: StatementClient): List[List[Any]] = { + @tailrec + def getData(trino: StatementClient): (Boolean, List[List[Any]]) = { + if (trino.isRunning) { + val data = trino.currentData().getData() + trino.advance() + if (data != null) { + (true, data.asScala.toList.map(_.asScala.toList)) + } else { + getData(trino) + } + } else { + Verify.verify(trino.isFinished) + val finalStatus = trino.finalStatusInfo() + if (finalStatus.getError() != null) { + throw KyuubiSQLException( + s"Query ${finalStatus.getId} failed: ${finalStatus.getError.getMessage}") + } + (false, List[List[Any]]()) + } + } + + Iterator.continually(getData(trino)).takeWhile(_._1).flatMap(_._2).toList + } + + private def getTrinoStatementClient(sql: String): StatementClient = { + StatementClientFactory.newStatementClient(httpClient, clientSession.get, sql) + } + + private def createTestClientSession(connectUrl: URI): ClientSession = { + new ClientSession( + connectUrl, + "kyuubi_test", + Optional.of("test_user"), + "kyuubi", + Optional.of("test_token_tracing"), + Set[String]().asJava, + "test_client_info", + "test_catalog", + "test_schema", + null, + ZoneId.systemDefault(), + Locale.getDefault, + Collections.emptyMap(), + Map[String, String]( + "test_property_key0" -> "test_property_value0", + "test_property_key1" -> "test_propert_value1").asJava, + Map[String, String]( + "test_statement_key0" -> "select 1", + "test_statement_key1" -> "select 2").asJava, + Collections.emptyMap(), + Collections.emptyMap(), + null, + new Duration(2, TimeUnit.MINUTES), + true) + + } + +} diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala index 67a502288ec..87c8eda968a 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala @@ -17,13 +17,24 @@ package org.apache.kyuubi.server.trino.api +import java.net.URI import java.time.ZoneId +import javax.ws.rs.core.MediaType + +import scala.collection.JavaConverters._ import io.trino.client.ProtocolHeaders.TRINO_HEADERS +import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V9 +import org.scalatest.concurrent.PatienceConfiguration.Timeout +import org.scalatest.time.SpanSugar.convertIntToGrainOfTime + +import org.apache.kyuubi.{KyuubiFunSuite, RestFrontendTestHelper} +import org.apache.kyuubi.events.KyuubiOperationEvent +import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle} +import org.apache.kyuubi.operation.OperationState.{FINISHED, OperationState} -import org.apache.kyuubi.KyuubiFunSuite +class TrinoContextSuite extends KyuubiFunSuite with RestFrontendTestHelper { -class TrinoContextSuite extends KyuubiFunSuite { import TrinoContext._ test("create trino request context with header") { @@ -67,4 +78,83 @@ class TrinoContextSuite extends KyuubiFunSuite { assert(actual == expectedTrinoContext) } + test("test convert") { + val opHandle = getOpHandle("select 1") + val opHandleStr = opHandle.identifier.toString + checkOpState(opHandleStr, FINISHED) + + val metadataResp = fe.be.getResultSetMetadata(opHandle) + val tRowSet = fe.be.fetchResults(opHandle, FetchOrientation.FETCH_NEXT, 1000, false) + val status = fe.be.getOperationStatus(opHandle, Some(0)) + + val uri = new URI("sfdsfsdfdsf") + val results = TrinoContext + .createQueryResults("/xdfd/xdf", uri, uri, status, Option(metadataResp), Option(tRowSet)) + + print(results.toString) + assert(results.getColumns.get(0).getType.equals("integer")) + assert(results.getData.asScala.last.get(0) == 1) + } + + test("test convert from table") { + initSql("CREATE DATABASE IF NOT EXISTS INIT_DB") + initSql( + "CREATE TABLE IF NOT EXISTS INIT_DB.test(a int, b double, c String," + + "d BOOLEAN,e DATE,f TIMESTAMP,g ARRAY,h DECIMAL," + + "i MAP) USING PARQUET;") + initSql( + "INSERT INTO INIT_DB.test VALUES (1,2.2,'3',true,current_date()," + + "current_timestamp(),array('1','2'),2.0, map('m','p') )") + + val opHandle = getOpHandle("SELECT * FROM INIT_DB.test") + val opHandleStr = opHandle.identifier.toString + checkOpState(opHandleStr, FINISHED) + + val metadataResp = fe.be.getResultSetMetadata(opHandle) + val tRowSet = fe.be.fetchResults(opHandle, FetchOrientation.FETCH_NEXT, 1000, false) + val status = fe.be.getOperationStatus(opHandle, Some(0)) + + val uri = new URI("sfdsfsdfdsf") + val results = TrinoContext + .createQueryResults("/xdfd/xdf", uri, uri, status, Option(metadataResp), Option(tRowSet)) + + print(results.toString) + assert(results.getColumns.get(0).getType.equals("integer")) + assert(results.getData.asScala.last.get(0) != null) + } + + def getOpHandleStr(statement: String = "show tables"): String = { + getOpHandle(statement).identifier.toString + } + + def getOpHandle(statement: String = "show tables"): OperationHandle = { + val sessionHandle = fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V9, + "admin", + "123456", + "localhost", + Map("testConfig" -> "testValue")) + + if (statement.nonEmpty) { + fe.be.executeStatement(sessionHandle, statement, Map.empty, runAsync = false, 30000) + } else { + fe.be.getCatalogs(sessionHandle) + } + } + + private def checkOpState(opHandleStr: String, state: OperationState): Unit = { + eventually(Timeout(30.seconds)) { + val response = webTarget.path(s"api/v1/operations/$opHandleStr/event") + .request(MediaType.APPLICATION_JSON_TYPE).get() + assert(response.getStatus === 200) + val operationEvent = response.readEntity(classOf[KyuubiOperationEvent]) + assert(operationEvent.state === state.name()) + } + } + + private def initSql(sql: String): Unit = { + val initOpHandle = getOpHandle(sql) + val initOpHandleStr = initOpHandle.identifier.toString + checkOpState(initOpHandleStr, FINISHED) + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/v1/StatementResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/v1/StatementResourceSuite.scala index b60c7c67aa2..44602759c21 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/v1/StatementResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/v1/StatementResourceSuite.scala @@ -17,15 +17,26 @@ package org.apache.kyuubi.server.trino.api.v1 -import org.apache.kyuubi.{KyuubiFunSuite, RestFrontendTestHelper} -import org.apache.kyuubi.config.KyuubiConf.FrontendProtocols -import org.apache.kyuubi.config.KyuubiConf.FrontendProtocols.FrontendProtocol +import javax.ws.rs.client.Entity +import javax.ws.rs.core.{MediaType, Response} + +import scala.collection.JavaConverters._ + +import io.trino.client.{QueryError, QueryResults} +import io.trino.client.ProtocolHeaders.TRINO_HEADERS + +import org.apache.kyuubi.{KyuubiFunSuite, KyuubiSQLException, TrinoRestFrontendTestHelper} +import org.apache.kyuubi.server.trino.api.{Query, TrinoContext} import org.apache.kyuubi.server.trino.api.v1.dto.Ok +import org.apache.kyuubi.session.SessionHandle -class StatementResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { +class StatementResourceSuite extends KyuubiFunSuite with TrinoRestFrontendTestHelper { - override protected val frontendProtocols: Seq[FrontendProtocol] = - FrontendProtocols.TRINO :: Nil + case class TrinoResponse( + response: Option[Response] = None, + queryError: Option[QueryError] = None, + data: List[List[Any]] = List[List[Any]](), + isEnd: Boolean = false) test("statement test") { val response = webTarget.path("v1/statement/test").request().get() @@ -33,4 +44,72 @@ class StatementResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper assert(result == new Ok("trino server is running")) } + test("statement submit for query error") { + + val response = webTarget.path("v1/statement") + .request().post(Entity.entity("select a", MediaType.TEXT_PLAIN_TYPE)) + + val trinoResponseIter = Iterator.iterate(TrinoResponse(response = Option(response)))(getData) + val isErr = trinoResponseIter.takeWhile(_.isEnd == false).exists { t => + t.queryError != None && t.response == None + } + assert(isErr == true) + } + + test("statement submit and get result") { + val response = webTarget.path("v1/statement") + .request().post(Entity.entity("select 1", MediaType.TEXT_PLAIN_TYPE)) + + val trinoResponseIter = Iterator.iterate(TrinoResponse(response = Option(response)))(getData) + val dataSet = trinoResponseIter + .takeWhile(_.isEnd == false) + .map(_.data) + .flatten.toList + assert(dataSet == List(List(1))) + } + + test("query cancel") { + val response = webTarget.path("v1/statement") + .request().post(Entity.entity("select 1", MediaType.TEXT_PLAIN_TYPE)) + assert(response.getStatus == 200) + val qr = response.readEntity(classOf[QueryResults]) + val sessionManager = fe.be.sessionManager + val sessionHandle = + response.getStringHeaders.get(TRINO_HEADERS.responseSetSession).asScala + .map(_.split("=")) + .find { + case Array(Query.KYUUBI_SESSION_ID, _) => true + } + .map { + case Array(_, value) => SessionHandle.fromUUID(TrinoContext.urlDecode(value)) + }.get + sessionManager.getSession(sessionHandle) + + val path = qr.getNextUri.getPath + val nextResponse = webTarget.path(path).request().header( + TRINO_HEADERS.requestSession(), + s"${Query.KYUUBI_SESSION_ID}=${TrinoContext.urlEncode(sessionHandle.identifier.toString)}") + .delete() + assert(nextResponse.getStatus == 204) + val exception = intercept[KyuubiSQLException](sessionManager.getSession(sessionHandle)) + assert(exception.getMessage === s"Invalid $sessionHandle") + } + + private def getData(current: TrinoResponse): TrinoResponse = { + current.response.map { response => + assert(response.getStatus == 200) + val qr = response.readEntity(classOf[QueryResults]) + val nextData = Option(qr.getData) + .map(_.asScala.toList.map(_.asScala.toList)) + .getOrElse(List[List[Any]]()) + val nextResponse = Option(qr.getNextUri).map { + uri => + val path = uri.getPath + val headers = response.getHeaders + webTarget.path(path).request().headers(headers).get() + } + TrinoResponse(nextResponse, Option(qr.getError), nextData) + }.getOrElse(TrinoResponse(isEnd = true)) + } + } diff --git a/kyuubi-server/web-ui/.env.development b/kyuubi-server/web-ui/.env.development index d8297cf3624..d1d91dd384d 100644 --- a/kyuubi-server/web-ui/.env.development +++ b/kyuubi-server/web-ui/.env.development @@ -15,4 +15,4 @@ NODE_ENV=development -VITE_APP_DEV_WEB_URL='/' +VITE_APP_DEV_WEB_URL='http://0.0.0.0:10099/' diff --git a/kyuubi-server/web-ui/.eslintrc b/kyuubi-server/web-ui/.eslintrc index ebbf401995e..f2bff2cd6e3 100644 --- a/kyuubi-server/web-ui/.eslintrc +++ b/kyuubi-server/web-ui/.eslintrc @@ -69,6 +69,9 @@ "exports": "never", "functions": "never" }], + "prettier/prettier": ["error", { + "bracketSameLine": true + }], "vue/multi-word-component-names": "off", "vue/component-definition-name-casing": "off", "vue/require-valid-default-prop": "off", diff --git a/kyuubi-server/web-ui/.gitignore b/kyuubi-server/web-ui/.gitignore index be5bdb2366b..c6cab4b869c 100644 --- a/kyuubi-server/web-ui/.gitignore +++ b/kyuubi-server/web-ui/.gitignore @@ -14,6 +14,7 @@ # limitations under the License. .DS_Store +node node_modules /dist /coverage diff --git a/kyuubi-server/web-ui/.prettierrc b/kyuubi-server/web-ui/.prettierrc index 1fceefb9885..01db7f49bc1 100644 --- a/kyuubi-server/web-ui/.prettierrc +++ b/kyuubi-server/web-ui/.prettierrc @@ -4,7 +4,7 @@ "vueIndentScriptAndStyle": true, "singleQuote": true, "quoteProps": "as-needed", - "jsxBracketSameLine": false, + "bracketSameLine": true, "jsxSingleQuote": true, "arrowParens": "always", "htmlWhitespaceSensitivity": "strict", diff --git a/kyuubi-server/web-ui/README.md b/kyuubi-server/web-ui/README.md index cc5654b231e..b892a690261 100644 --- a/kyuubi-server/web-ui/README.md +++ b/kyuubi-server/web-ui/README.md @@ -15,8 +15,15 @@ npm install ### Development Project -To do this you can change the VITE_APP_DEV_WEB_URL parameter variable as the service url in `.env.development` in the project root directory, such as http://127.0. 0.1:8090 +Notice: +Before you start the Web UI project, please make sure the Kyuubi server has been started. + +Kyuubi Web UI will proxy the requests to Kyuubi server, with the default endpoint path to`http://localhost:10099`. Modify `VITE_APP_DEV_WEB_URL` in `.env.development` for customizing targeted endpoint path. + +#### Why proxy to http://localhost:10099 + +Currently kyuubi server binds on `http://0.0.0.0:10099` in case your are running kyuubi server in MacOS or Windows(If in linux, you should config kyuubi server `kyuubi.frontend.rest.bind.host=0.0.0.0`, or change `VITE_APP_DEV_WEB_URL` in `.env.development`). ```shell npm run dev @@ -56,3 +63,4 @@ pnpm run build # Code Format pnpm run prettier ``` + diff --git a/kyuubi-server/web-ui/index.html b/kyuubi-server/web-ui/index.html index bd4f506721c..2c4579eb0de 100644 --- a/kyuubi-server/web-ui/index.html +++ b/kyuubi-server/web-ui/index.html @@ -22,7 +22,7 @@ - Vite + Vue + TS + Apache Kyuubi Dashboard
                diff --git a/kyuubi-server/web-ui/package-lock.json b/kyuubi-server/web-ui/package-lock.json index 77a3991e5ad..0a2feeba118 100644 --- a/kyuubi-server/web-ui/package-lock.json +++ b/kyuubi-server/web-ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "kyuubi-ui", - "version": "1.7.0-SNAPSHOT", + "version": "1.8.0-SNAPSHOT", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "kyuubi-ui", - "version": "1.7.0-SNAPSHOT", + "version": "1.8.0-SNAPSHOT", "dependencies": { "@element-plus/icons-vue": "^2.0.9", "axios": "^0.27.2", diff --git a/kyuubi-server/web-ui/package.json b/kyuubi-server/web-ui/package.json index 2ae37cdb99f..131e69b7f71 100644 --- a/kyuubi-server/web-ui/package.json +++ b/kyuubi-server/web-ui/package.json @@ -1,7 +1,7 @@ { "name": "kyuubi-ui", "private": true, - "version": "1.7.0-SNAPSHOT", + "version": "1.8.0-SNAPSHOT", "type": "module", "scripts": { "dev": "vue-tsc --noEmit && vite --port 9090", @@ -17,6 +17,7 @@ "dependencies": { "@element-plus/icons-vue": "^2.0.9", "axios": "^0.27.2", + "date-fns": "^2.29.3", "element-plus": "^2.2.12", "pinia": "^2.0.18", "pinia-plugin-persistedstate": "^2.1.1", diff --git a/kyuubi-server/web-ui/pnpm-lock.yaml b/kyuubi-server/web-ui/pnpm-lock.yaml index 61fc5124dbe..1926352abe6 100644 --- a/kyuubi-server/web-ui/pnpm-lock.yaml +++ b/kyuubi-server/web-ui/pnpm-lock.yaml @@ -12,6 +12,7 @@ specifiers: '@vue/eslint-config-typescript': ^11.0.0 '@vue/test-utils': ^2.0.2 axios: ^0.27.2 + date-fns: ^2.29.3 element-plus: ^2.2.12 eslint: ^8.21.0 eslint-plugin-prettier: ^4.2.1 @@ -32,6 +33,7 @@ specifiers: dependencies: '@element-plus/icons-vue': 2.0.9_vue@3.2.37 axios: 0.27.2 + date-fns: 2.29.3 element-plus: 2.2.13_vue@3.2.37 pinia: 2.0.18_j6bzmzd4ujpabbp5objtwxyjp4 pinia-plugin-persistedstate: 2.1.1_pinia@2.0.18 @@ -907,6 +909,11 @@ packages: whatwg-url: 11.0.0 dev: true + /date-fns/2.29.3: + resolution: {integrity: sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA==} + engines: {node: '>=0.11'} + dev: false + /dayjs/1.11.5: resolution: {integrity: sha512-CAdX5Q3YW3Gclyo5Vpqkgpj8fSdLQcRuzfX6mC6Phy0nfJ0eGYOeS7m4mt2plDWLAtA4TqTakvbboHvUxfe4iA==} dev: false diff --git a/kyuubi-server/web-ui/src/api/session/index.ts b/kyuubi-server/web-ui/src/api/session/index.ts new file mode 100644 index 00000000000..6af5a817f30 --- /dev/null +++ b/kyuubi-server/web-ui/src/api/session/index.ts @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import request from '@/utils/request' + +export function getAllSessions() { + return request({ + url: 'api/v1/sessions', + method: 'get' + }) +} + +export function deleteSession(sessionId: string) { + return request({ + url: `api/v1/sessions/${sessionId}`, + method: 'delete' + }) +} diff --git a/kyuubi-server/web-ui/src/components/menu/index.vue b/kyuubi-server/web-ui/src/components/menu/index.vue index b563b491ec8..d6d4d1b56f1 100644 --- a/kyuubi-server/web-ui/src/components/menu/index.vue +++ b/kyuubi-server/web-ui/src/components/menu/index.vue @@ -21,14 +21,12 @@ class="el-menu-container" :collapse="isCollapse" :default-active="activePath" - :router="true" - > + :router="true">