From 0ca649de5e917fe745aef705f69932be66ae74bb Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 18:57:42 +0900 Subject: [PATCH 01/21] TAJO-814: Set up Travis CI builds. --- .travis.yml | 24 ++++++++++++++++++++++++ CHANGES | 2 ++ 2 files changed, 26 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..0a3de39958 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +language: java + +jdk: + - openjdk6 + +install: + - wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz -O /tmp/protobuf-2.5.0.tar.gz + - tar -xzvf /tmp/protobuf-2.5.0.tar.gz + - cd protobuf-2.5.0 && ./configure --prefix=/usr && make && sudo make install diff --git a/CHANGES b/CHANGES index 8a2e891bef..7a59ec1fe3 100644 --- a/CHANGES +++ b/CHANGES @@ -52,6 +52,8 @@ Release 0.9.0 - unreleased TASKS + TAJO-814: Set up Travis CI builds. (hyunsik) + TAJO-810: TAJO-810: Update Tajo site for 0.8.0 release. (hyunsik) TAJO-605: Rename Options to KeyValueList. (jinho) From 167e6bb177ec368853c015beaf17443378ca3091 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 19:39:38 +0900 Subject: [PATCH 02/21] Changed git depth. --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 0a3de39958..7b1a17b789 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,9 @@ language: java +git: + depth: 150 + jdk: - openjdk6 From c21f338644171eee66d818113882aa4a64e98f4d Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 20:52:05 +0900 Subject: [PATCH 03/21] Add script line. --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 7b1a17b789..0ab1ae419e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,3 +25,5 @@ install: - wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz -O /tmp/protobuf-2.5.0.tar.gz - tar -xzvf /tmp/protobuf-2.5.0.tar.gz - cd protobuf-2.5.0 && ./configure --prefix=/usr && make && sudo make install + +script: mvn clean install -Phcatalog-0.12.0 From 466290fbdd27d4d95cf7951b7ab8d01493422ccc Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 20:52:30 +0900 Subject: [PATCH 04/21] set install true. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0ab1ae419e..c1da26f6a7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ git: jdk: - openjdk6 -install: +install: true - wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz -O /tmp/protobuf-2.5.0.tar.gz - tar -xzvf /tmp/protobuf-2.5.0.tar.gz - cd protobuf-2.5.0 && ./configure --prefix=/usr && make && sudo make install From 9c48900d05dae4078178c7d8cf6f38e21dc40134 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 21:00:34 +0900 Subject: [PATCH 05/21] removed 'true' --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c1da26f6a7..53f676d087 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ git: jdk: - openjdk6 -install: true +install: - wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz -O /tmp/protobuf-2.5.0.tar.gz - tar -xzvf /tmp/protobuf-2.5.0.tar.gz - cd protobuf-2.5.0 && ./configure --prefix=/usr && make && sudo make install From 06bc7caf9477d0b72db498f46302a8a3a1cbec8f Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 23:10:44 +0900 Subject: [PATCH 06/21] Add travis-build.sh. --- .travis.yml | 10 ++++----- dev-support/travis-build.sh | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) create mode 100755 dev-support/travis-build.sh diff --git a/.travis.yml b/.travis.yml index 53f676d087..d193f7edc2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,9 +21,9 @@ git: jdk: - openjdk6 -install: - - wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz -O /tmp/protobuf-2.5.0.tar.gz - - tar -xzvf /tmp/protobuf-2.5.0.tar.gz - - cd protobuf-2.5.0 && ./configure --prefix=/usr && make && sudo make install +cache: + directories: + - $HOME/.m2 + - $HOME/local -script: mvn clean install -Phcatalog-0.12.0 +script: dev-support/travis-build.sh diff --git a/dev-support/travis-build.sh b/dev-support/travis-build.sh new file mode 100755 index 0000000000..736cd4783f --- /dev/null +++ b/dev-support/travis-build.sh @@ -0,0 +1,42 @@ +#!/bin/bash -x + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROTOBUF_VERSION=2.5.0 +SRC=`pwd` +INSTALL=${HOME}/local + +if [ ! -f ${INSTALL}/bin/protoc ] +then + + cd ${INSTALL} + echo "Fetching protobuf" + N="protobuf-${PROTOBUF_VERSION}" + wget https://protobuf.googlecode.com/files/${N}.tar.gz + tar -xzvf ${N}.tar.gz + rm ${N}.tar.gz + + echo "Building protobuf" + cd ${N} + ./configure --with-pic --prefix=${INSTALL} --with-gflags=${INSTALL} + make -j4 install +fi + +cd ${SRC} + +echo "mvn clean install -Phcatalog-0.12.0" +mvn clean install -Phcatalog-0.12.0 From 52e70306fa952f780a5bc8156c78ea8043c1fe78 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 23:20:40 +0900 Subject: [PATCH 07/21] Improved yml and rename travis-build.sh. --- .travis.yml | 4 +++- ...avis-build.sh => travis-install-dependencies.sh} | 13 +++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) rename dev-support/{travis-build.sh => travis-install-dependencies.sh} (89%) diff --git a/.travis.yml b/.travis.yml index d193f7edc2..3d9a84d20b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,4 +26,6 @@ cache: - $HOME/.m2 - $HOME/local -script: dev-support/travis-build.sh +install: ./dev-support/travis-install-dependencies.sh + +script: mvn clean install -Phcatalog-0.12.0 diff --git a/dev-support/travis-build.sh b/dev-support/travis-install-dependencies.sh similarity index 89% rename from dev-support/travis-build.sh rename to dev-support/travis-install-dependencies.sh index 736cd4783f..59b10d3f18 100755 --- a/dev-support/travis-build.sh +++ b/dev-support/travis-install-dependencies.sh @@ -17,12 +17,14 @@ # limitations under the License. PROTOBUF_VERSION=2.5.0 -SRC=`pwd` INSTALL=${HOME}/local -if [ ! -f ${INSTALL}/bin/protoc ] -then +if [ ! -d ${INSTALL} ]; then + echo "mkdir -p ${INSTALL}" + mkdir -p ${INSTALL} +fi +if [ ! -f ${INSTALL}/bin/protoc ]; then cd ${INSTALL} echo "Fetching protobuf" N="protobuf-${PROTOBUF_VERSION}" @@ -35,8 +37,3 @@ then ./configure --with-pic --prefix=${INSTALL} --with-gflags=${INSTALL} make -j4 install fi - -cd ${SRC} - -echo "mvn clean install -Phcatalog-0.12.0" -mvn clean install -Phcatalog-0.12.0 From e2004d7f157dbd14e7da20718089c0da24a1fe1b Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 7 May 2014 23:31:14 +0900 Subject: [PATCH 08/21] Add path for protoc. --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3d9a84d20b..228586ad23 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,10 +21,11 @@ git: jdk: - openjdk6 +env: PATH=$PATH:$HOME/local/bin + cache: directories: - $HOME/.m2 - - $HOME/local install: ./dev-support/travis-install-dependencies.sh From 446510393a43b309b9252f84cd77007ebfe5a38b Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 8 May 2014 00:20:35 +0900 Subject: [PATCH 09/21] Fixed the bug which results in empty disk list. --- .travis.yml | 1 + .../src/main/java/org/apache/tajo/storage/v2/DiskUtil.java | 2 +- .../main/java/org/apache/tajo/storage/v2/ScanScheduler.java | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 228586ad23..4ab0aa357c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,7 @@ env: PATH=$PATH:$HOME/local/bin cache: directories: - $HOME/.m2 + - $HOME/local install: ./dev-support/travis-install-dependencies.sh diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/DiskUtil.java b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/DiskUtil.java index d5873bb3ce..66827c2649 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/DiskUtil.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/DiskUtil.java @@ -133,7 +133,7 @@ private static String getDiskDeviceName(String partitionName) { return new String(result, 0, length); } - private static List getDefaultDiskDeviceInfos() { + public static List getDefaultDiskDeviceInfos() { DiskDeviceInfo diskDeviceInfo = new DiskDeviceInfo(0); diskDeviceInfo.setName("default"); diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/ScanScheduler.java b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/ScanScheduler.java index 71c4d93702..fe852a0fc7 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/ScanScheduler.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/ScanScheduler.java @@ -53,6 +53,9 @@ public ScanScheduler(StorgaeManagerContext context) { try { List deviceInfos = DiskUtil.getDiskDeviceInfos(); + if (deviceInfos.size() == 0) { + deviceInfos = DiskUtil.getDefaultDiskDeviceInfos(); + } for(DiskDeviceInfo eachInfo: deviceInfos) { LOG.info("Create DiskScanQueue:" + eachInfo.getName()); diskDeviceInfoMap.put(eachInfo.getId(), eachInfo); From 3831687f307021df238860a0a32aeb8e36e87838 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 8 May 2014 00:49:41 +0900 Subject: [PATCH 10/21] Add rediction of protobuf compilation to /dev/null. --- dev-support/travis-install-dependencies.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-support/travis-install-dependencies.sh b/dev-support/travis-install-dependencies.sh index 59b10d3f18..06fe61a555 100755 --- a/dev-support/travis-install-dependencies.sh +++ b/dev-support/travis-install-dependencies.sh @@ -34,6 +34,6 @@ if [ ! -f ${INSTALL}/bin/protoc ]; then echo "Building protobuf" cd ${N} - ./configure --with-pic --prefix=${INSTALL} --with-gflags=${INSTALL} - make -j4 install + ./configure --with-pic --prefix=${INSTALL} --with-gflags=${INSTALL} > /dev/null + make -j4 install > /dev/null fi From c97af4ea6062b1ce702fbc4fc78e16d25ba7291f Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 8 May 2014 00:52:48 +0900 Subject: [PATCH 11/21] Add quite option to maven. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4ab0aa357c..8a130276f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,4 +30,4 @@ cache: install: ./dev-support/travis-install-dependencies.sh -script: mvn clean install -Phcatalog-0.12.0 +script: mvn clean install -Phcatalog-0.12.0 -q From 229f9aca5e7f73c116073396feed1e4c864e93ec Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 8 May 2014 01:25:58 +0900 Subject: [PATCH 12/21] Changed some logs into debug level. --- .../engine/planner/rewrite/BasicQueryRewriteEngine.java | 4 +++- .../engine/planner/rewrite/ProjectionPushDownRule.java | 1 - .../main/java/org/apache/tajo/master/GlobalEngine.java | 8 +++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/BasicQueryRewriteEngine.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/BasicQueryRewriteEngine.java index 3b4b712f89..6b3ed1e80f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/BasicQueryRewriteEngine.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/BasicQueryRewriteEngine.java @@ -61,7 +61,9 @@ public LogicalPlan rewrite(LogicalPlan plan) throws PlanningException { rule = rewriteRule.getValue(); if (rule.isEligible(plan)) { plan = rule.rewrite(plan); - LOG.info("The rule \"" + rule.getName() + " \" rewrites the query."); + if (LOG.isDebugEnabled()) { + LOG.debug("The rule \"" + rule.getName() + " \" rewrites the query."); + } } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/ProjectionPushDownRule.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/ProjectionPushDownRule.java index 668ed689ec..c21c0871c8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/ProjectionPushDownRule.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/rewrite/ProjectionPushDownRule.java @@ -55,7 +55,6 @@ public boolean isEligible(LogicalPlan plan) { LogicalNode toBeOptimized = plan.getRootBlock().getRoot(); if (PlannerUtil.checkIfDDLPlan(toBeOptimized) || !plan.getRootBlock().hasTableExpression()) { - LOG.info("This query skips the logical optimization step."); return false; } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java b/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java index c87ad84064..4155609793 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/GlobalEngine.java @@ -339,9 +339,11 @@ private LogicalPlan createLogicalPlan(Session session, Expr expression) throws P } LogicalPlan plan = planner.createPlan(session, expression); - LOG.info("============================================="); - LOG.info("Non Optimized Query: \n" + plan.toString()); - LOG.info("============================================="); + if (LOG.isDebugEnabled()) { + LOG.debug("============================================="); + LOG.debug("Non Optimized Query: \n" + plan.toString()); + LOG.debug("============================================="); + } optimizer.optimize(plan); LOG.info("============================================="); LOG.info("Optimized Query: \n" + plan.toString()); From d00564bab8aa32000c7713429672e37188d058c2 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 8 May 2014 01:57:01 +0900 Subject: [PATCH 13/21] redirection from stdout to /dev/null. --- .travis.yml | 2 +- dev-support/travis-install-dependencies.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8a130276f2..4a62fdc756 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,4 +30,4 @@ cache: install: ./dev-support/travis-install-dependencies.sh -script: mvn clean install -Phcatalog-0.12.0 -q +script: mvn clean install -Phcatalog-0.12.0 -q > /dev/null diff --git a/dev-support/travis-install-dependencies.sh b/dev-support/travis-install-dependencies.sh index 06fe61a555..2f77ffad6a 100755 --- a/dev-support/travis-install-dependencies.sh +++ b/dev-support/travis-install-dependencies.sh @@ -28,8 +28,8 @@ if [ ! -f ${INSTALL}/bin/protoc ]; then cd ${INSTALL} echo "Fetching protobuf" N="protobuf-${PROTOBUF_VERSION}" - wget https://protobuf.googlecode.com/files/${N}.tar.gz - tar -xzvf ${N}.tar.gz + wget -q https://protobuf.googlecode.com/files/${N}.tar.gz + tar -xzvf ${N}.tar.gz > /dev/null rm ${N}.tar.gz echo "Building protobuf" From d4906b39a0cafb82ed918e75ff9d1e411e91fdca Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 8 May 2014 02:46:50 +0900 Subject: [PATCH 14/21] Changed log4j for test and reduced logs. --- .travis.yml | 2 +- .../tajo-catalog-server/src/test/resources/log4j.properties | 2 +- tajo-client/src/main/resources/log4j.properties | 4 ++-- tajo-common/src/main/java/log4j.properties | 2 +- tajo-common/src/test/java/log4j.properties | 2 +- tajo-core/src/main/java/log4j.properties | 4 ++-- tajo-core/src/main/resources/log4j.properties | 4 ++-- tajo-core/src/test/java/log4j.properties | 4 ++-- tajo-core/src/test/resources/log4j.properties | 4 ++-- tajo-jdbc/src/main/resources/log4j.properties | 4 ++-- tajo-rpc/src/test/java/log4j.properties | 2 +- 11 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4a62fdc756..e509f809d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,4 +30,4 @@ cache: install: ./dev-support/travis-install-dependencies.sh -script: mvn clean install -Phcatalog-0.12.0 -q > /dev/null +script: mvn clean install -Phcatalog-0.12.0 -q | grep -v 'INFO:' diff --git a/tajo-catalog/tajo-catalog-server/src/test/resources/log4j.properties b/tajo-catalog/tajo-catalog-server/src/test/resources/log4j.properties index 2b429757ad..237ceafb16 100644 --- a/tajo-catalog/tajo-catalog-server/src/test/resources/log4j.properties +++ b/tajo-catalog/tajo-catalog-server/src/test/resources/log4j.properties @@ -22,4 +22,4 @@ log4j.rootLogger=info,stdout log4j.threshhold=INFO log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n diff --git a/tajo-client/src/main/resources/log4j.properties b/tajo-client/src/main/resources/log4j.properties index 11d9ad2d10..ef171971b7 100644 --- a/tajo-client/src/main/resources/log4j.properties +++ b/tajo-client/src/main/resources/log4j.properties @@ -22,6 +22,6 @@ log4j.rootLogger=info,stdout log4j.threshhold=INFO log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n -log4j.logger.org.apache.tajo.cli=FATAL \ No newline at end of file +log4j.logger.org.apache.tajo.cli=FATAL diff --git a/tajo-common/src/main/java/log4j.properties b/tajo-common/src/main/java/log4j.properties index 29cb6a84c0..8af8ce352e 100644 --- a/tajo-common/src/main/java/log4j.properties +++ b/tajo-common/src/main/java/log4j.properties @@ -16,4 +16,4 @@ log4j.rootLogger=info,stdout log4j.threshhold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n diff --git a/tajo-common/src/test/java/log4j.properties b/tajo-common/src/test/java/log4j.properties index c1ac487c43..2c4d99161b 100644 --- a/tajo-common/src/test/java/log4j.properties +++ b/tajo-common/src/test/java/log4j.properties @@ -22,4 +22,4 @@ log4j.rootLogger=info,stdout log4j.threshhold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n diff --git a/tajo-core/src/main/java/log4j.properties b/tajo-core/src/main/java/log4j.properties index 15e5778d2b..3f43f8f852 100644 --- a/tajo-core/src/main/java/log4j.properties +++ b/tajo-core/src/main/java/log4j.properties @@ -22,9 +22,9 @@ log4j.rootLogger=info,stdout,EventCounter log4j.threshhold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n log4j.logger.org.apache.hadoop=WARN log4j.logger.org.apache.hadoop.conf=ERROR -log4j.appender.EventCounter=org.apache.tajo.util.metrics.TajoLogEventCounter \ No newline at end of file +log4j.appender.EventCounter=org.apache.tajo.util.metrics.TajoLogEventCounter diff --git a/tajo-core/src/main/resources/log4j.properties b/tajo-core/src/main/resources/log4j.properties index 007c8f53d5..772763e36b 100644 --- a/tajo-core/src/main/resources/log4j.properties +++ b/tajo-core/src/main/resources/log4j.properties @@ -22,7 +22,7 @@ log4j.rootLogger=info,stdout log4j.threshhold=INFO log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n log4j.logger.org.apache.hadoop=WARN -log4j.logger.org.apache.hadoop.conf=ERROR \ No newline at end of file +log4j.logger.org.apache.hadoop.conf=ERROR diff --git a/tajo-core/src/test/java/log4j.properties b/tajo-core/src/test/java/log4j.properties index 749124c306..48f9d8e609 100644 --- a/tajo-core/src/test/java/log4j.properties +++ b/tajo-core/src/test/java/log4j.properties @@ -22,7 +22,7 @@ log4j.rootLogger=info,stdout log4j.threshhold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n log4j.logger.org.apache.hadoop=WARN -log4j.logger.org.apache.hadoop.conf=ERROR \ No newline at end of file +log4j.logger.org.apache.hadoop.conf=ERROR diff --git a/tajo-core/src/test/resources/log4j.properties b/tajo-core/src/test/resources/log4j.properties index 145703c82b..307608ecd2 100644 --- a/tajo-core/src/test/resources/log4j.properties +++ b/tajo-core/src/test/resources/log4j.properties @@ -22,7 +22,7 @@ log4j.rootLogger=info,stdout log4j.threshhold=INFO log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n -log4j.logger.org.apache.hadoop=WARN \ No newline at end of file +log4j.logger.org.apache.hadoop=WARN diff --git a/tajo-jdbc/src/main/resources/log4j.properties b/tajo-jdbc/src/main/resources/log4j.properties index 606f2d49dc..8f74c4993f 100644 --- a/tajo-jdbc/src/main/resources/log4j.properties +++ b/tajo-jdbc/src/main/resources/log4j.properties @@ -22,6 +22,6 @@ log4j.rootLogger=info,stdout log4j.threshhold=INFO log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n -log4j.logger.org.apache.tajo.jdbc=FATAL \ No newline at end of file +log4j.logger.org.apache.tajo.jdbc=FATAL diff --git a/tajo-rpc/src/test/java/log4j.properties b/tajo-rpc/src/test/java/log4j.properties index c1ac487c43..2c4d99161b 100644 --- a/tajo-rpc/src/test/java/log4j.properties +++ b/tajo-rpc/src/test/java/log4j.properties @@ -22,4 +22,4 @@ log4j.rootLogger=info,stdout log4j.threshhold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p: %c (%M(%L)) - %m%n From 78b2a42d807c7f244ecab814d6a99513a01daf33 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Mon, 21 Jul 2014 16:03:12 +0900 Subject: [PATCH 15/21] TAJO-965: Upgrade Bytes class and move some methods to others. --- .../main/java/org/apache/tajo/util/Bytes.java | 1184 ++++++++--------- .../java/org/apache/tajo/util/BytesUtils.java | 182 +++ .../java/org/apache/tajo/util/NumberUtil.java | 348 ++++- .../java/org/apache/tajo/util/TestBytes.java | 111 -- .../org/apache/tajo/util/TestNumberUtil.java | 47 +- .../org/apache/tajo/util/TestStringUtil.java | 40 +- .../apache/tajo/engine/eval/ExprTestBase.java | 6 +- .../java/org/apache/tajo/storage/CSVFile.java | 6 +- .../java/org/apache/tajo/storage/RowFile.java | 3 +- .../org/apache/tajo/storage/StorageUtil.java | 41 + .../storage/TextSerializerDeserializer.java | 7 +- .../tajo/storage/index/bst/BSTIndex.java | 16 +- .../apache/tajo/storage/rcfile/RCFile.java | 5 +- .../sequencefile/SequenceFileAppender.java | 8 +- .../sequencefile/SequenceFileScanner.java | 4 +- .../tajo/storage/v2/CSVFileScanner.java | 8 +- .../org/apache/tajo/storage/v2/RCFile.java | 6 +- .../apache/tajo/storage/TestLazyTuple.java | 6 +- 18 files changed, 1219 insertions(+), 809 deletions(-) create mode 100644 tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java delete mode 100644 tajo-common/src/test/java/org/apache/tajo/util/TestBytes.java diff --git a/tajo-common/src/main/java/org/apache/tajo/util/Bytes.java b/tajo-common/src/main/java/org/apache/tajo/util/Bytes.java index 448274aa08..405ec2fe1c 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/Bytes.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/Bytes.java @@ -21,30 +21,48 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.io.WritableUtils; import sun.misc.Unsafe; -import java.io.*; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.lang.reflect.Field; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.Charset; import java.security.AccessController; import java.security.PrivilegedAction; -import java.util.ArrayList; +import java.security.SecureRandom; +import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; import java.util.List; +import static com.google.common.base.Preconditions.*; + /** * Utility class that handles byte arrays, conversions to/from other types, * comparisons, hash code generation, manufacturing keys for HashMaps or * HashSets, etc. */ public class Bytes { + //HConstants.UTF8_ENCODING should be updated if this changed + /** When we encode strings, we always specify UTF8 encoding */ + private static final String UTF8_ENCODING = "UTF-8"; + + //HConstants.UTF8_CHARSET should be updated if this changed + /** When we encode strings, we always specify UTF8 encoding */ + private static final Charset UTF8_CHARSET = Charset.forName(UTF8_ENCODING); + + //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed + private static final byte [] EMPTY_BYTE_ARRAY = new byte [0]; private static final Log LOG = LogFactory.getLog(Bytes.class); @@ -88,7 +106,6 @@ public class Bytes { */ public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE; - /** * Estimate of size cost to pay beyond payload in jvm for instance of byte []. * Estimate based on study of jhat and jprofiler numbers. @@ -97,9 +114,21 @@ public class Bytes { // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?) public static final int ESTIMATED_HEAP_TAX = 16; + /** + * Returns length of the byte array, returning 0 if the array is null. + * Useful for calculating sizes. + * @param b byte array, which can be null + * @return 0 if b is null, otherwise returns length + */ + final public static int len(byte[] b) { + return b == null ? 0 : b.length; + } + /** * Byte array comparator class. */ + @InterfaceAudience.Public + @InterfaceStability.Stable public static class ByteArrayComparator implements RawComparator { /** * Constructor @@ -107,35 +136,65 @@ public static class ByteArrayComparator implements RawComparator { public ByteArrayComparator() { super(); } + @Override public int compare(byte [] left, byte [] right) { return compareTo(left, right); } + @Override public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) { return LexicographicalComparerHolder.BEST_COMPARER. - compareTo(b1, s1, l1, b2, s2, l2); + compareTo(b1, s1, l1, b2, s2, l2); + } + } + + /** + * A {@link ByteArrayComparator} that treats the empty array as the largest value. + * This is useful for comparing row end keys for regions. + */ + // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region + // boundaries. Thus semantically, we should treat empty byte array as the smallest value + // while comparing row keys, start keys etc; but as the largest value for comparing + // region boundaries for endKeys. + @InterfaceAudience.Public + @InterfaceStability.Stable + public static class RowEndKeyComparator extends ByteArrayComparator { + @Override + public int compare(byte[] left, byte[] right) { + return compare(left, 0, left.length, right, 0, right.length); + } + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + if (b1 == b2 && s1 == s2 && l1 == l2) { + return 0; + } + if (l1 == 0) { + return l2; //0 or positive + } + if (l2 == 0) { + return -1; + } + return super.compare(b1, s1, l1, b2, s2, l2); } } /** * Pass this to TreeMaps where byte [] are keys. */ - public static Comparator BYTES_COMPARATOR = - new ByteArrayComparator(); + public final static Comparator BYTES_COMPARATOR = new ByteArrayComparator(); /** * Use comparing byte arrays, byte-by-byte */ - public static RawComparator BYTES_RAWCOMPARATOR = - new ByteArrayComparator(); + public final static RawComparator BYTES_RAWCOMPARATOR = new ByteArrayComparator(); /** * Read byte-array written with a WritableableUtils.vint prefix. * @param in Input to read from. * @return byte array read off in - * @throws java.io.IOException e + * @throws IOException e */ public static byte [] readByteArray(final DataInput in) - throws IOException { + throws IOException { int len = WritableUtils.readVInt(in); if (len < 0) { throw new NegativeArraySizeException(Integer.toString(len)); @@ -163,10 +222,10 @@ public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) { * Write byte-array with a WritableableUtils.vint prefix. * @param out output stream to be written to * @param b array to write - * @throws java.io.IOException e + * @throws IOException e */ public static void writeByteArray(final DataOutput out, final byte [] b) - throws IOException { + throws IOException { if(b == null) { WritableUtils.writeVInt(out, 0); } else { @@ -180,11 +239,11 @@ public static void writeByteArray(final DataOutput out, final byte [] b) * @param b array * @param offset offset into array * @param length length past offset - * @throws java.io.IOException e + * @throws IOException e */ public static void writeByteArray(final DataOutput out, final byte [] b, - final int offset, final int length) - throws IOException { + final int offset, final int length) + throws IOException { WritableUtils.writeVInt(out, length); out.write(b, offset, length); } @@ -199,7 +258,7 @@ public static void writeByteArray(final DataOutput out, final byte [] b, * @return New offset in src array. */ public static int writeByteArray(final byte [] tgt, final int tgtOffset, - final byte [] src, final int srcOffset, final int srcLength) { + final byte [] src, final int srcOffset, final int srcLength) { byte [] vint = vintToBytes(srcLength); System.arraycopy(vint, 0, tgt, tgtOffset, vint.length); int offset = tgtOffset + vint.length; @@ -207,11 +266,6 @@ public static int writeByteArray(final byte [] tgt, final int tgtOffset, return offset + srcLength; } - public static void writeVLong(ByteArrayOutputStream byteStream, long l) { - byte[] vLongBytes = Bytes.vlongToBytes(l); - byteStream.write(vLongBytes, 0, vLongBytes.length); - } - /** * Put bytes at the specified byte array position. * @param tgtBytes the byte array @@ -222,7 +276,7 @@ public static void writeVLong(ByteArrayOutputStream byteStream, long l) { * @return incremented offset */ public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes, - int srcOffset, int srcLength) { + int srcOffset, int srcLength) { System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength); return tgtOffset + srcLength; } @@ -240,14 +294,37 @@ public static int putByte(byte[] bytes, int offset, byte b) { } /** - * Returns a new byte array, copied from the passed ByteBuffer. - * @param bb A ByteBuffer + * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified. + * @param bytes the byte array + * @param offset position in the array + * @param buf ByteBuffer to write out + * @return incremented offset + */ + public static int putByteBuffer(byte[] bytes, int offset, ByteBuffer buf) { + int len = buf.remaining(); + buf.get(bytes, offset, len); + return offset + len; + } + + /** + * Returns a new byte array, copied from the given {@code buf}, + * from the index 0 (inclusive) to the limit (exclusive), + * regardless of the current position. + * The position and the other index parameters are not changed. + * + * @param buf a byte buffer * @return the byte array + * @see #getBytes(ByteBuffer) */ - public static byte[] toBytes(ByteBuffer bb) { - int length = bb.limit(); - byte [] result = new byte[length]; - System.arraycopy(bb.array(), bb.arrayOffset(), result, 0, length); + public static byte[] toBytes(ByteBuffer buf) { + ByteBuffer dup = buf.duplicate(); + dup.position(0); + return readBytes(dup); + } + + private static byte[] readBytes(ByteBuffer buf) { + byte [] result = new byte[buf.remaining()]; + buf.get(result); return result; } @@ -276,8 +353,7 @@ public static String toString(final byte [] b1, /** * This method will convert utf8 encoded bytes into a string. If - * an UnsupportedEncodingException occurs, this method will eat it - * and return null instead. + * the given byte array is null, this method will return null. * * @param b Presumed UTF-8 encoded byte array. * @param off offset into array @@ -291,12 +367,7 @@ public static String toString(final byte [] b, int off, int len) { if (len == 0) { return ""; } - try { - return new String(b, off, len, "UTF-8"); - } catch (UnsupportedEncodingException e) { - LOG.error("UTF-8 not supported?", e); - return null; - } + return new String(b, off, len, UTF8_CHARSET); } /** @@ -311,18 +382,25 @@ public static String toStringBinary(final byte [] b) { return "null"; return toStringBinary(b, 0, b.length); } - + /** - * Converts the given byte buffer, from its array offset to its limit, to - * a string. The position and the mark are ignored. + * Converts the given byte buffer to a printable representation, + * from the index 0 (inclusive) to the limit (exclusive), + * regardless of the current position. + * The position and the other index parameters are not changed. * * @param buf a byte buffer * @return a string representation of the buffer's binary contents + * @see #toBytes(ByteBuffer) + * @see #getBytes(ByteBuffer) */ public static String toStringBinary(ByteBuffer buf) { if (buf == null) return "null"; - return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); + if (buf.hasArray()) { + return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); + } + return toStringBinary(toBytes(buf)); } /** @@ -337,21 +415,19 @@ public static String toStringBinary(ByteBuffer buf) { */ public static String toStringBinary(final byte [] b, int off, int len) { StringBuilder result = new StringBuilder(); - try { - String first = new String(b, off, len, "ISO-8859-1"); - for (int i = 0; i < first.length() ; ++i ) { - int ch = first.charAt(i) & 0xFF; - if ( (ch >= '0' && ch <= '9') - || (ch >= 'A' && ch <= 'Z') - || (ch >= 'a' && ch <= 'z') - || " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0 ) { - result.append(first.charAt(i)); - } else { - result.append(String.format("\\x%02X", ch)); - } + // Just in case we are passed a 'len' that is > buffer length... + if (off >= b.length) return result.toString(); + if (off + len > b.length) len = b.length - off; + for (int i = off; i < off + len ; ++i ) { + int ch = b[i] & 0xFF; + if ( (ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') + || " `~!@#$%^&*()-_=+[]{}|;:'\",.<>/?".indexOf(ch) >= 0 ) { + result.append((char)ch); + } else { + result.append(String.format("\\x%02X", ch)); } - } catch (UnsupportedEncodingException e) { - LOG.error("ISO-8859-1 not supported?", e); } return result.toString(); } @@ -359,7 +435,7 @@ public static String toStringBinary(final byte [] b, int off, int len) { private static boolean isHexDigit(char c) { return (c >= 'A' && c <= 'F') || - (c >= '0' && c <= '9'); + (c >= '0' && c <= '9'); } /** @@ -376,19 +452,12 @@ public static byte toBinaryFromHex(byte ch) { } public static byte [] toBytesBinary(String in) { - // this may be bigger than we need, but lets be safe. + // this may be bigger than we need, but let's be safe. byte [] b = new byte[in.length()]; int size = 0; for (int i = 0; i < in.length(); ++i) { char ch = in.charAt(i); - if (ch == '\\') { - // begin hex escape: - char next = in.charAt(i+1); - if (next != 'x') { - // invalid escape sequence, ignore this one. - b[size++] = (byte)ch; - continue; - } + if (ch == '\\' && in.length() > i+1 && in.charAt(i+1) == 'x') { // ok, take next 2 hex digits. char hd1 = in.charAt(i+2); char hd2 = in.charAt(i+3); @@ -420,26 +489,7 @@ public static byte toBinaryFromHex(byte ch) { * @return the byte array */ public static byte[] toBytes(String s) { - try { - return s.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - LOG.error("UTF-8 not supported?", e); - return null; - } - } - - /** - * Converts a char array to a ascii byte array. - * - * @param chars string - * @return the byte array - */ - public static byte[] toASCIIBytes(char[] chars) { - byte[] buffer = new byte[chars.length]; - for (int i = 0; i < chars.length; i++) { - buffer[i] = (byte) chars[i]; - } - return buffer; + return s.getBytes(UTF8_CHARSET); } /** @@ -526,16 +576,16 @@ public static long toLong(byte[] bytes, int offset, final int length) { } private static IllegalArgumentException - explainWrongLengthOrOffset(final byte[] bytes, - final int offset, - final int length, - final int expectedLength) { + explainWrongLengthOrOffset(final byte[] bytes, + final int offset, + final int length, + final int expectedLength) { String reason; if (length != expectedLength) { reason = "Wrong length: " + length + ", expected " + expectedLength; } else { - reason = "offset (" + offset + ") + length (" + length + ") exceed the" - + " capacity of the array: " + bytes.length; + reason = "offset (" + offset + ") + length (" + length + ") exceed the" + + " capacity of the array: " + bytes.length; } return new IllegalArgumentException(reason); } @@ -640,7 +690,9 @@ public static int putDouble(byte [] bytes, int offset, double d) { } /** - * Convert an int value to a byte array + * Convert an int value to a byte array. Big-endian. Same as what DataOutputStream.writeInt + * does. + * * @param val value * @return the byte array */ @@ -694,6 +746,28 @@ public static int toInt(byte[] bytes, int offset, final int length) { return n; } + /** + * Converts a byte array to an int value + * @param bytes byte array + * @param offset offset into array + * @param length how many bytes should be considered for creating int + * @return the int value + * @throws IllegalArgumentException if there's not enough room in the array at the offset + * indicated. + */ + public static int readAsInt(byte[] bytes, int offset, final int length) { + if (offset + length > bytes.length) { + throw new IllegalArgumentException("offset (" + offset + ") + length (" + length + + ") exceed the" + " capacity of the array: " + bytes.length); + } + int n = 0; + for(int i = offset; i < (offset + length); i++) { + n <<= 8; + n ^= bytes[i] & 0xFF; + } + return n; + } + /** * Put an int value out to the specified byte array position. * @param bytes the byte array @@ -769,17 +843,16 @@ public static short toShort(byte[] bytes, int offset, final int length) { } /** - * This method will get a sequence of bytes from pos -> limit, - * but will restore pos after. - * @param buf - * @return + * Returns a new byte array, copied from the given {@code buf}, + * from the position (inclusive) to the limit (exclusive). + * The position and the other index parameters are not changed. + * + * @param buf a byte buffer + * @return the byte array + * @see #toBytes(ByteBuffer) */ public static byte[] getBytes(ByteBuffer buf) { - int savedPos = buf.position(); - byte [] newBytes = new byte[buf.remaining()]; - buf.get(newBytes); - buf.position(savedPos); - return newBytes; + return readBytes(buf.duplicate()); } /** @@ -802,6 +875,29 @@ public static int putShort(byte[] bytes, int offset, short val) { return offset + SIZEOF_SHORT; } + /** + * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of + * the short will be put into the array. The caller of the API need to make sure they will not + * loose the value by doing so. This is useful to store an unsigned short which is represented as + * int in other parts. + * @param bytes the byte array + * @param offset position in the array + * @param val value to write out + * @return incremented offset + * @throws IllegalArgumentException if the byte array given doesn't have + * enough room at the offset specified. + */ + public static int putAsShort(byte[] bytes, int offset, int val) { + if (bytes.length - offset < SIZEOF_SHORT) { + throw new IllegalArgumentException("Not enough room to put a short at" + + " offset " + offset + " in a " + bytes.length + " byte array"); + } + bytes[offset+1] = (byte) val; + val >>= 8; + bytes[offset] = (byte) val; + return offset + SIZEOF_SHORT; + } + /** * Convert a BigDecimal value to a byte array * @@ -827,17 +923,6 @@ public static BigDecimal toBigDecimal(byte[] bytes) { return toBigDecimal(bytes, 0, bytes.length); } - /** - * Converts a byte array to a BigDecimal value - * - * @param bytes - * @param offset - * @return the char value - */ - public static BigDecimal toBigDecimal(byte[] bytes, int offset) { - return toBigDecimal(bytes, offset, bytes.length); - } - /** * Converts a byte array to a BigDecimal value * @@ -848,13 +933,13 @@ public static BigDecimal toBigDecimal(byte[] bytes, int offset) { */ public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) { if (bytes == null || length < SIZEOF_INT + 1 || - (offset + length > bytes.length)) { + (offset + length > bytes.length)) { return null; } - int scale = toInt(bytes, 0); + int scale = toInt(bytes, offset); byte[] tcBytes = new byte[length - SIZEOF_INT]; - System.arraycopy(bytes, SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT); + System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT); return new BigDecimal(new BigInteger(tcBytes), scale); } @@ -876,7 +961,7 @@ public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) { offset = putInt(result, offset, val.scale()); return putBytes(result, offset, valueBytes, 0, valueBytes.length); } - + /** * @param vint Integer to make a vint of. * @return Vint as bytes array. @@ -915,45 +1000,6 @@ public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) { return result; } - /** - * @param n Long to make a VLong of. - * @return VLong as bytes array. - */ - public static byte[] vlongToBytes(long n) { - byte [] result; - int offset = 0; - if (n >= -112 && n <= 127) { - result = new byte[1]; - result[offset] = (byte) n; - return result; - } - - int len = -112; - if (n < 0) { - n ^= -1L; // take one's complement' - len = -120; - } - - long tmp = n; - while (tmp != 0) { - tmp = tmp >> 8; - len--; - } - - int size = WritableUtils.decodeVIntSize((byte)len); - - result = new byte[size]; - result[offset++] = (byte) len; - len = (len < -120) ? -(len + 120) : -(len + 112); - - for (int idx = len; idx != 0; idx--) { - int shiftbits = (idx - 1) * 8; - long mask = 0xFFL << shiftbits; - result[offset++] = (byte)((n & mask) >> shiftbits); - } - return result; - } - /** * @param buffer buffer to convert * @return vint bytes as an integer. @@ -982,47 +1028,19 @@ public static long bytesToVint(final byte [] buffer) { * @return deserialized long from stream. */ public static long readVLong(final byte [] buffer, final int offset) - throws IOException { - byte firstByte = buffer[offset]; - int length = (byte) WritableUtils.decodeVIntSize(firstByte); - if (length == 1) { - return firstByte; - } - long i = 0; - for (int idx = 0; idx < length - 1; idx++) { - byte b = buffer[offset + 1 + idx]; - i = i << 8; - i = i | (b & 0xFF); - } - return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); - } - - /** - * Reads a zero-compressed encoded int from input stream and returns it. - * @param buffer Binary array - * @param offset Offset into array at which vint begins. - * @throws java.io.IOException e - * @return deserialized long from stream. - */ - public static int readVInt(final byte [] buffer, final int offset) throws IOException { byte firstByte = buffer[offset]; - int length = (byte) WritableUtils.decodeVIntSize(firstByte); - if (length == 1) { + int len = WritableUtils.decodeVIntSize(firstByte); + if (len == 1) { return firstByte; } - int i = 0; - for (int idx = 0; idx < length - 1; idx++) { + long i = 0; + for (int idx = 0; idx < len-1; idx++) { byte b = buffer[offset + 1 + idx]; i = i << 8; i = i | (b & 0xFF); } - return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i); - } - - public static byte getVIntSize(byte[] bytes, int offset) { - byte firstByte = bytes[offset]; - return (byte) WritableUtils.decodeVIntSize(firstByte); + return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); } /** @@ -1032,7 +1050,7 @@ public static byte getVIntSize(byte[] bytes, int offset) { */ public static int compareTo(final byte [] left, final byte [] right) { return LexicographicalComparerHolder.BEST_COMPARER. - compareTo(left, 0, left.length, right, 0, right.length); + compareTo(left, 0, left.length, right, 0, right.length); } /** @@ -1047,20 +1065,15 @@ public static int compareTo(final byte [] left, final byte [] right) { * @return 0 if equal, < 0 if left is less than right, etc. */ public static int compareTo(byte[] buffer1, int offset1, int length1, - byte[] buffer2, int offset2, int length2) { + byte[] buffer2, int offset2, int length2) { return LexicographicalComparerHolder.BEST_COMPARER. - compareTo(buffer1, offset1, length1, buffer2, offset2, length2); + compareTo(buffer1, offset1, length1, buffer2, offset2, length2); } - - /** - * The number of bytes required to represent a primitive {@code long} - * value. - */ - public static final int LONG_BYTES = Long.SIZE / Byte.SIZE; - + interface Comparer { - abstract public int compareTo(T buffer1, int offset1, int length1, - T buffer2, int offset2, int length2); + int compareTo( + T buffer1, int offset1, int length1, T buffer2, int offset2, int length2 + ); } @VisibleForTesting @@ -1070,7 +1083,7 @@ static Comparer lexicographicalComparerJavaImpl() { /** * Provides a lexicographical comparer implementation; either a Java - * implementation or a faster implementation based on {@link sun.misc.Unsafe}. + * implementation or a faster implementation based on {@link Unsafe}. * *

Uses reflection to gracefully fall back to the Java implementation if * {@code Unsafe} isn't available. @@ -1079,7 +1092,7 @@ static Comparer lexicographicalComparerJavaImpl() { static class LexicographicalComparerHolder { static final String UNSAFE_COMPARER_NAME = LexicographicalComparerHolder.class.getName() + "$UnsafeComparer"; - + static final Comparer BEST_COMPARER = getBestComparer(); /** * Returns the Unsafe-using Comparer, or falls back to the pure-Java @@ -1092,19 +1105,19 @@ static Comparer getBestComparer() { // yes, UnsafeComparer does implement Comparer @SuppressWarnings("unchecked") Comparer comparer = - (Comparer) theClass.getEnumConstants()[0]; + (Comparer) theClass.getEnumConstants()[0]; return comparer; } catch (Throwable t) { // ensure we really catch *everything* return lexicographicalComparerJavaImpl(); } } - + enum PureJavaComparer implements Comparer { INSTANCE; @Override public int compareTo(byte[] buffer1, int offset1, int length1, - byte[] buffer2, int offset2, int length2) { + byte[] buffer2, int offset2, int length2) { // Short circuit equal case if (buffer1 == buffer2 && offset1 == offset2 && @@ -1162,7 +1175,7 @@ public Object run() { } static final boolean littleEndian = - ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); + ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); /** * Returns true if x1 is less than x2, when both values are treated as @@ -1185,7 +1198,7 @@ static boolean lessThanUnsigned(long x1, long x2) { */ @Override public int compareTo(byte[] buffer1, int offset1, int length1, - byte[] buffer2, int offset2, int length2) { + byte[] buffer2, int offset2, int length2) { // Short circuit equal case if (buffer1 == buffer2 && offset1 == offset2 && @@ -1193,7 +1206,7 @@ public int compareTo(byte[] buffer1, int offset1, int length1, return 0; } int minLength = Math.min(length1, length2); - int minWords = minLength / LONG_BYTES; + int minWords = minLength / SIZEOF_LONG; int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET; int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET; @@ -1202,7 +1215,7 @@ public int compareTo(byte[] buffer1, int offset1, int length1, * time is no slower than comparing 4 bytes at a time even on 32-bit. * On the other hand, it is substantially faster on 64-bit. */ - for (int i = 0; i < minWords * LONG_BYTES; i += LONG_BYTES) { + for (int i = 0; i < minWords * SIZEOF_LONG; i += SIZEOF_LONG) { long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); long diff = lw ^ rw; @@ -1237,7 +1250,7 @@ public int compareTo(byte[] buffer1, int offset1, int length1, } // The epilogue to cover the last (minLength % 8) elements. - for (int i = minWords * LONG_BYTES; i < minLength; i++) { + for (int i = minWords * SIZEOF_LONG; i < minLength; i++) { int a = (buffer1[offset1 + i] & 0xff); int b = (buffer2[offset2 + i] & 0xff); if (a != b) { @@ -1261,7 +1274,7 @@ public static boolean equals(final byte [] left, final byte [] right) { if (left == null || right == null) return false; if (left.length != right.length) return false; if (left.length == 0) return true; - + // Since we're often comparing adjacent sorted data, // it's usual to have equal arrays except for the very last byte // so check that first @@ -1269,7 +1282,7 @@ public static boolean equals(final byte [] left, final byte [] right) { return compareTo(left, right) == 0; } - + public static boolean equals(final byte[] left, int leftOffset, int leftLen, final byte[] right, int rightOffset, int rightLen) { // short circuit case @@ -1285,16 +1298,37 @@ public static boolean equals(final byte[] left, int leftOffset, int leftLen, if (leftLen == 0) { return true; } - + // Since we're often comparing adjacent sorted data, // it's usual to have equal arrays except for the very last byte // so check that first if (left[leftOffset + leftLen - 1] != right[rightOffset + rightLen - 1]) return false; return LexicographicalComparerHolder.BEST_COMPARER. - compareTo(left, leftOffset, leftLen, right, rightOffset, rightLen) == 0; + compareTo(left, leftOffset, leftLen, right, rightOffset, rightLen) == 0; } - + + + /** + * @param a left operand + * @param buf right operand + * @return True if equal + */ + public static boolean equals(byte[] a, ByteBuffer buf) { + if (a == null) return buf == null; + if (buf == null) return false; + if (a.length != buf.remaining()) return false; + + // Thou shalt not modify the original byte buffer in what should be read only operations. + ByteBuffer b = buf.duplicate(); + for (byte anA : a) { + if (anA != b.get()) { + return false; + } + } + return true; + } + /** * Return true if the byte array on the right is a prefix of the byte @@ -1302,15 +1336,16 @@ public static boolean equals(final byte[] left, int leftOffset, int leftLen, */ public static boolean startsWith(byte[] bytes, byte[] prefix) { return bytes != null && prefix != null && - bytes.length >= prefix.length && - LexicographicalComparerHolder.BEST_COMPARER. - compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0; + bytes.length >= prefix.length && + LexicographicalComparerHolder.BEST_COMPARER. + compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0; } /** * @param b bytes to hash - * @return Runs {@link org.apache.hadoop.io.WritableComparator#hashBytes(byte[], int)} on the + * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the * passed in array. This method is what {@link org.apache.hadoop.io.Text} and + * {@link ImmutableBytesWritable} use calculating hash code. */ public static int hashCode(final byte [] b) { return hashCode(b, b.length); @@ -1319,8 +1354,9 @@ public static int hashCode(final byte [] b) { /** * @param b value * @param length length of the value - * @return Runs {@link org.apache.hadoop.io.WritableComparator#hashBytes(byte[], int)} on the + * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the * passed in array. This method is what {@link org.apache.hadoop.io.Text} and + * {@link ImmutableBytesWritable} use calculating hash code. */ public static int hashCode(final byte [] b, final int length) { return WritableComparator.hashBytes(b, length); @@ -1351,7 +1387,7 @@ public static Integer mapKey(final byte [] b, final int length) { * @return New array that has a in lower half and b in upper half. */ public static byte [] add(final byte [] a, final byte [] b) { - return add(a, b, new byte [0]); + return add(a, b, EMPTY_BYTE_ARRAY); } /** @@ -1432,114 +1468,49 @@ public static Integer mapKey(final byte [] b, final int length) { * @return Array of dividing values */ public static byte [][] split(final byte [] a, final byte [] b, final int num) { - byte[][] ret = new byte[num+2][]; + return split(a, b, false, num); + } + + /** + * Split passed range. Expensive operation relatively. Uses BigInteger math. + * Useful splitting ranges for MapReduce jobs. + * @param a Beginning of range + * @param b End of range + * @param inclusive Whether the end of range is prefix-inclusive or is + * considered an exclusive boundary. Automatic splits are generally exclusive + * and manual splits with an explicit range utilize an inclusive end of range. + * @param num Number of times to split range. Pass 1 if you want to split + * the range in two; i.e. one split. + * @return Array of dividing values + */ + public static byte[][] split(final byte[] a, final byte[] b, + boolean inclusive, final int num) { + byte[][] ret = new byte[num + 2][]; int i = 0; - Iterable iter = iterateOnSplits(a, b, num); - if (iter == null) return null; + Iterable iter = iterateOnSplits(a, b, inclusive, num); + if (iter == null) + return null; for (byte[] elem : iter) { ret[i++] = elem; } return ret; } - public static byte[][] splitPreserveAllTokens(byte[] str, char separatorChar, int[] target) { - return splitWorker(str, 0, -1, separatorChar, true, target); - } - - public static byte[][] splitPreserveAllTokens(byte[] str, int offset, int length, char separatorChar, int[] target) { - return splitWorker(str, offset, length, separatorChar, true, target); - } - - public static byte[][] splitPreserveAllTokens(byte[] str, char separatorChar) { - return splitWorker(str, 0, -1, separatorChar, true, null); - } - - public static byte[][] splitPreserveAllTokens(byte[] str, int length, char separatorChar) { - return splitWorker(str, 0, length, separatorChar, true, null); - } - /** - * Performs the logic for the split and - * splitPreserveAllTokens methods that do not return a - * maximum array length. - * - * @param str the String to parse, may be null - * @param length amount of bytes to str - * @param separatorChar the ascii separate character - * @param preserveAllTokens if true, adjacent separators are - * @param target the projection target - * treated as empty token separators; if false, adjacent - * separators are treated as one separator. - * @return an array of parsed Strings, null if null String input - */ - private static byte[][] splitWorker(byte[] str, int offset, int length, char separatorChar, boolean preserveAllTokens, int[] target) { - // Performance tuned for 2.0 (JDK1.4) - - if (str == null) { - return null; - } - int len = length; - if (len == 0) { - return new byte[1][0]; - }else if(len < 0){ - len = str.length - offset; - } - - List list = new ArrayList(); - int i = 0, start = 0; - boolean match = false; - boolean lastMatch = false; - int currentTarget = 0; - int currentIndex = 0; - while (i < len) { - if (str[i + offset] == separatorChar) { - if (match || preserveAllTokens) { - if (target == null) { - byte[] bytes = new byte[i - start]; - System.arraycopy(str, start + offset, bytes, 0, bytes.length); - list.add(bytes); - } else if (target.length > currentTarget && currentIndex == target[currentTarget]) { - byte[] bytes = new byte[i - start]; - System.arraycopy(str, start + offset, bytes, 0, bytes.length); - list.add(bytes); - currentTarget++; - } else { - list.add(null); - } - currentIndex++; - match = false; - lastMatch = true; - } - start = ++i; - continue; - } - lastMatch = false; - match = true; - i++; - } - if (match || (preserveAllTokens && lastMatch)) { - if (target == null) { - byte[] bytes = new byte[i - start]; - System.arraycopy(str, start + offset, bytes, 0, bytes.length); - list.add(bytes); - } else if (target.length > currentTarget && currentIndex == target[currentTarget]) { - byte[] bytes = new byte[i - start]; - System.arraycopy(str, start + offset, bytes, 0, bytes.length); - list.add(bytes); //str.substring(start, i)); - currentTarget++; - } else { - list.add(null); - } - currentIndex++; - } - return (byte[][]) list.toArray(new byte[list.size()][]); + * Iterate over keys within the passed range, splitting at an [a,b) boundary. + */ + public static Iterable iterateOnSplits(final byte[] a, + final byte[] b, final int num) + { + return iterateOnSplits(a, b, false, num); } + /** - * Iterate over keys within the passed inclusive range. + * Iterate over keys within the passed range. */ public static Iterable iterateOnSplits( - final byte[] a, final byte[]b, final int num) - { + final byte[] a, final byte[]b, boolean inclusive, final int num) + { byte [] aPadded; byte [] bPadded; if (a.length < b.length) { @@ -1556,12 +1527,15 @@ public static Iterable iterateOnSplits( throw new IllegalArgumentException("b <= a"); } if (num <= 0) { - throw new IllegalArgumentException("num cannot be < 0"); + throw new IllegalArgumentException("num cannot be <= 0"); } byte [] prependHeader = {1, 0}; final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); - final BigInteger diffBI = stopBI.subtract(startBI); + BigInteger diffBI = stopBI.subtract(startBI); + if (inclusive) { + diffBI = diffBI.add(BigInteger.ONE); + } final BigInteger splitsBI = BigInteger.valueOf(num + 1); if(diffBI.compareTo(splitsBI) < 0) { return null; @@ -1576,7 +1550,7 @@ public static Iterable iterateOnSplits( final Iterator iterator = new Iterator() { private int i = -1; - + @Override public boolean hasNext() { return i < num+1; @@ -1587,7 +1561,7 @@ public byte[] next() { i++; if (i == 0) return a; if (i == num + 1) return b; - + BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i))); byte [] padded = curBI.toByteArray(); if (padded[1] == 0) @@ -1601,9 +1575,9 @@ public byte[] next() { public void remove() { throw new UnsupportedOperationException(); } - + }; - + return new Iterable() { @Override public Iterator iterator() { @@ -1612,6 +1586,18 @@ public Iterator iterator() { }; } + /** + * @param bytes array to hash + * @param offset offset to start from + * @param length length to hash + * */ + public static int hashCode(byte[] bytes, int offset, int length) { + int hash = 1; + for (int i = offset; i < offset + length; i++) + hash = (31 * hash) + (int) bytes[i]; + return hash; + } + /** * @param t operands * @return Array of byte arrays made from passed array of Text @@ -1660,7 +1646,7 @@ public Iterator iterator() { * ranging from -(N + 1) to N - 1. */ public static int binarySearch(byte [][]arr, byte []key, int offset, - int length, RawComparator comparator) { + int length, RawComparator comparator) { int low = 0; int high = arr.length - 1; @@ -1673,10 +1659,10 @@ public static int binarySearch(byte [][]arr, byte []key, int offset, // key lives above the midpoint if (cmp > 0) low = mid + 1; - // key lives below the midpoint + // key lives below the midpoint else if (cmp < 0) high = mid - 1; - // BAM. how often does this really happen? + // BAM. how often does this really happen? else return mid; } @@ -1703,11 +1689,11 @@ else if (cmp < 0) newvalue = new byte[SIZEOF_LONG]; } System.arraycopy(val, 0, newvalue, newvalue.length - val.length, - val.length); + val.length); val = newvalue; } else if (val.length > SIZEOF_LONG) { throw new IllegalArgumentException("Increment Bytes - value too big: " + - val.length); + val.length); } if(amount == 0) return val; if(val[0] < 0){ @@ -1770,7 +1756,7 @@ else if (cmp < 0) * Writes a string as a fixed-size field, padded with zeros. */ public static void writeStringFixedSize(final DataOutput out, String s, - int size) throws IOException { + int size) throws IOException { byte[] b = toBytes(s); if (b.length > size) { throw new IOException("Trying to write " + b.length + " bytes (" + @@ -1785,7 +1771,7 @@ public static void writeStringFixedSize(final DataOutput out, String s, /** * Reads a fixed-size field and interprets it as a string padded with zeros. */ - public static String readStringFixedSize(final DataInput in, int size) + public static String readStringFixedSize(final DataInput in, int size) throws IOException { byte[] b = new byte[size]; in.readFully(b); @@ -1796,377 +1782,279 @@ public static String readStringFixedSize(final DataInput in, int size) return toString(b, 0, n); } - public static int readFully(InputStream is, byte[] buffer, int offset, int length) - throws IOException { - int nread = 0; - while (nread < length) { - int nbytes = is.read(buffer, offset + nread, length - nread); - if (nbytes < 0) { - return nread > 0 ? nread : nbytes; - } - nread += nbytes; - } - return nread; + /** + * Copy the byte array given in parameter and return an instance + * of a new byte array with the same length and the same content. + * @param bytes the byte array to duplicate + * @return a copy of the given byte array + */ + public static byte [] copy(byte [] bytes) { + if (bytes == null) return null; + byte [] result = new byte[bytes.length]; + System.arraycopy(bytes, 0, result, 0, bytes.length); + return result; } /** - * Similar to readFully(). Skips bytes in a loop. - * @param in The DataInput to skip bytes from - * @param len number of bytes to skip. - * @throws IOException if it could not skip requested number of bytes - * for any reason (including EOF) + * Copy the byte array given in parameter and return an instance + * of a new byte array with the same length and the same content. + * @param bytes the byte array to copy from + * @return a copy of the given designated byte array + * @param offset + * @param length */ - public static void skipFully(DataInput in, int len) throws IOException { - int amt = len; - while (amt > 0) { - long ret = in.skipBytes(amt); - if (ret == 0) { - // skip may return 0 even if we're not at EOF. Luckily, we can - // use the read() method to figure out if we're at the end. - int b = in.readByte(); - if (b == -1) { - throw new EOFException( "Premature EOF from inputStream after " + - "skipping " + (len - amt) + " byte(s)."); - } - ret = 1; - } - amt -= ret; - } + public static byte [] copy(byte [] bytes, final int offset, final int length) { + if (bytes == null) return null; + byte [] result = new byte[length]; + System.arraycopy(bytes, offset, result, 0, length); + return result; } /** - * Parses the byte array argument as if it was an int value and returns the - * result. Throws NumberFormatException if the byte array does not represent an - * int quantity. - * - * @return int the value represented by the argument - * @throws NumberFormatException if the argument could not be parsed as an int quantity. + * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from + * somewhere. (mcorgan) + * @param a Array to search. Entries must be sorted and unique. + * @param fromIndex First index inclusive of "a" to include in the search. + * @param toIndex Last index exclusive of "a" to include in the search. + * @param key The byte to search for. + * @return The index of key if found. If not found, return -(index + 1), where negative indicates + * "not found" and the "index + 1" handles the "-0" case. */ - public static int parseInt(byte[] bytes, int start, int length) { - return parseInt(bytes, start, length, 10); + public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) { + int unsignedKey = key & 0xff; + int low = fromIndex; + int high = toIndex - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int midVal = a[mid] & 0xff; + + if (midVal < unsignedKey) { + low = mid + 1; + } else if (midVal > unsignedKey) { + high = mid - 1; + } else { + return mid; // key found + } + } + return -(low + 1); // key not found. } /** - * Parses the byte array argument as if it was an int value and returns the - * result. Throws NumberFormatException if the byte array does not represent an - * int quantity. The second argument specifies the radix to use when parsing - * the value. + * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding + * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes. * - * @param radix the base to use for conversion. - * @return the value represented by the argument - * @throws NumberFormatException if the argument could not be parsed as an int quantity. + * @param input The byte[] to increment. + * @return The incremented copy of "in". May be same length or 1 byte longer. */ - public static int parseInt(byte[] bytes, int start, int length, int radix) { - if (bytes == null) { - throw new NumberFormatException("String is null"); + public static byte[] unsignedCopyAndIncrement(final byte[] input) { + byte[] copy = copy(input); + if (copy == null) { + throw new IllegalArgumentException("cannot increment null array"); } - if (radix < Character.MIN_RADIX || radix > Character.MAX_RADIX) { - throw new NumberFormatException("Invalid radix: " + radix); - } - if (length == 0) { - throw new NumberFormatException("Empty byte array!"); - } - int offset = start; - boolean negative = bytes[start] == '-'; - if (negative || bytes[start] == '+') { - offset++; - if (length == 1) { - throw new NumberFormatException(new String(bytes, start, - length)); + for (int i = copy.length - 1; i >= 0; --i) { + if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum + copy[i] = 0; + } else { + ++copy[i]; + return copy; } } - - return parse(bytes, start, length, offset, radix, negative); + // we maxed out the array + byte[] out = new byte[copy.length + 1]; + out[0] = 1; + System.arraycopy(copy, 0, out, 1, copy.length); + return out; } - /** - * @param bytes - * @param start - * @param length - * @param radix the base to use for conversion. - * @param offset the starting position after the sign (if exists) - * @param radix the base to use for conversion. - * @param negative whether the number is negative. - * @return the value represented by the argument - * @throws NumberFormatException if the argument could not be parsed as an int quantity. - */ - private static int parse(byte[] bytes, int start, int length, int offset, - int radix, boolean negative) { - byte separator = '.'; - int max = Integer.MIN_VALUE / radix; - int result = 0, end = start + length; - while (offset < end) { - int digit = digit(bytes[offset++], radix); - if (digit == -1) { - if (bytes[offset - 1] == separator) { - // We allow decimals and will return a truncated integer in that case. - // Therefore we won't throw an exception here (checking the fractional - // part happens below.) - break; - } - throw new NumberFormatException(new String(bytes, start, - length)); - } - if (max > result) { - throw new NumberFormatException(new String(bytes, start, - length)); - } - int next = result * radix - digit; - if (next > result) { - throw new NumberFormatException(new String(bytes, start, - length)); + public static boolean equals(List a, List b) { + if (a == null) { + if (b == null) { + return true; } - result = next; + return false; } - - // This is the case when we've encountered a decimal separator. The fractional - // part will not change the number, but we will verify that the fractional part - // is well formed. - while (offset < end) { - int digit = digit(bytes[offset++], radix); - if (digit == -1) { - throw new NumberFormatException(new String(bytes, start, - length)); - } + if (b == null) { + return false; } - - if (!negative) { - result = -result; - if (result < 0) { - throw new NumberFormatException(new String(bytes, start, - length)); + if (a.size() != b.size()) { + return false; + } + for (int i = 0; i < a.size(); ++i) { + if (!Bytes.equals(a.get(i), b.get(i))) { + return false; } } - return result; + return true; } - /** - * Returns the digit represented by character b. + * Returns the index of the first appearance of the value {@code target} in + * {@code array}. * - * @param b The ascii code of the character - * @param radix The radix - * @return -1 if it's invalid - */ - private static int digit(int b, int radix) { - int r = -1; - if (b >= '0' && b <= '9') { - r = b - '0'; - } else if (b >= 'A' && b <= 'Z') { - r = b - 'A' + 10; - } else if (b >= 'a' && b <= 'z') { - r = b - 'a' + 10; - } - if (r >= radix) { - r = -1; + * @param array an array of {@code byte} values, possibly empty + * @param target a primitive {@code byte} value + * @return the least index {@code i} for which {@code array[i] == target}, or + * {@code -1} if no such index exists. + */ + public static int indexOf(byte[] array, byte target) { + for (int i = 0; i < array.length; i++) { + if (array[i] == target) { + return i; + } } - return r; + return -1; } /** - * Returns the digit represented by character b, radix is 10 + * Returns the start position of the first occurrence of the specified {@code + * target} within {@code array}, or {@code -1} if there is no such occurrence. * - * @param b The ascii code of the character - * @return -1 if it's invalid - */ - private static boolean isDigit(int b) { - return (b >= '0' && b <= '9'); - } - - private static final int maxExponent = 511; /* Largest possible base 10 exponent. Any - * exponent larger than this will already - * produce underflow or overflow, so there's - * no need to worry about additional digits. - */ - public static final double powersOf10[] = { /* Table giving binary powers of 10. Entry */ - 10., /* is 10^2^i. Used to convert decimal */ - 100., /* exponents into floating-point numbers. */ - 1.0e4, - 1.0e8, - 1.0e16, - 1.0e32, - 1.0e64, - 1.0e128, - 1.0e256 - }; - - /** - * Parses the byte array argument as if it was a double value and returns the - * result. Throws NumberFormatException if the byte array does not represent a - * double value. + *

More formally, returns the lowest index {@code i} such that {@code + * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly + * the same elements as {@code target}. * - * @return double, the value represented by the argument - * @throws NumberFormatException if the argument could not be parsed as a double + * @param array the array to search for the sequence {@code target} + * @param target the array to search for as a sub-sequence of {@code array} */ - public static double parseDouble(byte[] bytes, int start, int length) { - if (bytes == null) { - throw new NumberFormatException("String is null"); - } - if (length == 0) { - throw new NumberFormatException("Empty byte array!"); - } - - /* - * Strip off leading blanks - */ - int offset = start; - int end = start + length; - - while (offset < end && bytes[offset] == ' ') { - offset++; - } - if (offset == end) { - throw new NumberFormatException("blank byte array!"); + public static int indexOf(byte[] array, byte[] target) { + checkNotNull(array, "array"); + checkNotNull(target, "target"); + if (target.length == 0) { + return 0; } - /* - * check for a sign. - */ - boolean sign = false; - if (bytes[offset] == '-') { - sign = true; - offset++; - } else if (bytes[offset] == '+') { - offset++; - } - if (offset == end) { - throw new NumberFormatException("the byte array only has a sign!"); - } - - /* - * Count the number of digits in the mantissa (including the decimal - * point), and also locate the decimal point. - */ - int mantSize = 0; /* Number of digits in mantissa. */ - int decicalOffset = -1; /* Number of mantissa digits BEFORE decimal point. */ - for (; offset < end; offset++) { - if (!isDigit(bytes[offset])) { - if ((bytes[offset] != '.') || (decicalOffset >= 0)) { - break; + outer: + for (int i = 0; i < array.length - target.length + 1; i++) { + for (int j = 0; j < target.length; j++) { + if (array[i + j] != target[j]) { + continue outer; } - decicalOffset = mantSize; } - mantSize++; + return i; } + return -1; + } - int exponentOffset = offset; /* Temporarily holds location of exponent in bytes. */ + /** + * @param array an array of {@code byte} values, possibly empty + * @param target a primitive {@code byte} value + * @return {@code true} if {@code target} is present as an element anywhere in {@code array}. + */ + public static boolean contains(byte[] array, byte target) { + return indexOf(array, target) > -1; + } - /* - * Now suck up the digits in the mantissa. Use two integers to - * collect 9 digits each (this is faster than using floating-point). - * If the mantissa has more than 18 digits, ignore the extras, since - * they can't affect the value anyway. - */ - offset -= mantSize; - if (decicalOffset < 0) { - decicalOffset = mantSize; - } else { - mantSize -= 1; /* One of the digits was the decimal point. */ - } - int fracExponent; /* Exponent that derives from the fractional - * part. Under normal circumstatnces, it is - * the negative of the number of digits in F. - * However, if I is very long, the last digits - * of I get dropped (otherwise a long I with a - * large negative exponent could cause an - * unnecessary overflow on I alone). In this - * case, fracExp is incremented one for each - * dropped digit. */ - if (mantSize > 18) { - fracExponent = decicalOffset - 18; - mantSize = 18; - } else { - fracExponent = decicalOffset - mantSize; - } + /** + * @param array an array of {@code byte} values, possibly empty + * @param target an array of {@code byte} + * @return {@code true} if {@code target} is present anywhere in {@code array} + */ + public static boolean contains(byte[] array, byte[] target) { + return indexOf(array, target) > -1; + } - if (mantSize == 0) { - return 0.0; - } + /** + * Fill given array with zeros. + * @param b array which needs to be filled with zeros + */ + public static void zero(byte[] b) { + zero(b, 0, b.length); + } - int frac1 = 0; - for (; mantSize > 9; mantSize--) { - int b = bytes[offset]; - offset++; - if (b == '.') { - b = bytes[offset]; - offset++; - } - frac1 = 10 * frac1 + (b - '0'); - } - int frac2 = 0; - for (; mantSize > 0; mantSize--) { - int b = bytes[offset]; - offset++; - if (b == '.') { - b = bytes[offset]; - offset++; - } - frac2 = 10 * frac2 + (b - '0'); - } - double fraction = (1.0e9 * frac1) + frac2; + /** + * Fill given array with zeros at the specified position. + * @param b + * @param offset + * @param length + */ + public static void zero(byte[] b, int offset, int length) { + checkPositionIndex(offset, b.length, "offset"); + checkArgument(length > 0, "length must be greater than 0"); + checkPositionIndex(offset + length, b.length, "offset + length"); + Arrays.fill(b, offset, offset + length, (byte) 0); + } - /* - * Skim off the exponent. - */ - int exponent = 0; /* Exponent read from "EX" field. */ - offset = exponentOffset; - boolean expSign = false; - - if (offset < end) { - if ((bytes[offset] != 'E') && (bytes[offset] != 'e')) { - throw new NumberFormatException(new String(bytes, start, - length)); - } + private static final SecureRandom RNG = new SecureRandom(); - // (bytes[offset] == 'E') || (bytes[offset] == 'e') - offset++; + /** + * Fill given array with random bytes. + * @param b array which needs to be filled with random bytes + */ + public static void random(byte[] b) { + RNG.nextBytes(b); + } - if (bytes[offset] == '-') { - expSign = true; - offset++; - } else if (bytes[offset] == '+') { - offset++; - } + /** + * Fill given array with random bytes at the specified position. + * @param b + * @param offset + * @param length + */ + public static void random(byte[] b, int offset, int length) { + checkPositionIndex(offset, b.length, "offset"); + checkArgument(length > 0, "length must be greater than 0"); + checkPositionIndex(offset + length, b.length, "offset + length"); + byte[] buf = new byte[length]; + RNG.nextBytes(buf); + System.arraycopy(buf, 0, b, offset, length); + } - for (; offset < end; offset++) { - if (isDigit(bytes[offset])) { - exponent = exponent * 10 + (bytes[offset] - '0'); - } else { - throw new NumberFormatException(new String(bytes, start, - length)); - } - } + /** + * Create a max byte array with the specified max byte count + * @param maxByteCount the length of returned byte array + * @return the created max byte array + */ + public static byte[] createMaxByteArray(int maxByteCount) { + byte[] maxByteArray = new byte[maxByteCount]; + for (int i = 0; i < maxByteArray.length; i++) { + maxByteArray[i] = (byte) 0xff; } + return maxByteArray; + } - exponent = expSign ? (fracExponent - exponent) : (fracExponent + exponent); - - /* - * Generate a floating-point number that represents the exponent. - * Do this by processing the exponent one bit at a time to combine - * many powers of 2 of 10. Then combine the exponent with the - * fraction. - */ - if (exponent < 0) { - expSign = true; - exponent = -exponent; - } else { - expSign = false; - } - if (exponent > maxExponent) { - throw new NumberFormatException(new String(bytes, start, - length)); + /** + * Create a byte array which is multiple given bytes + * @param srcBytes + * @param multiNum + * @return byte array + */ + public static byte[] multiple(byte[] srcBytes, int multiNum) { + if (multiNum <= 0) { + return new byte[0]; } - - double dblExp = 1.0; - for (int i = 0; exponent != 0; exponent >>= 1, i++) { - if ((exponent & 01) == 01) { - dblExp *= powersOf10[i]; - } + byte[] result = new byte[srcBytes.length * multiNum]; + for (int i = 0; i < multiNum; i++) { + System.arraycopy(srcBytes, 0, result, i * srcBytes.length, + srcBytes.length); } + return result; + } - fraction = (expSign) ? (fraction / dblExp) : (fraction * dblExp); - - return sign ? (-fraction) : fraction; + /** + * Convert a byte array into a hex string + * @param b + */ + public static String toHex(byte[] b) { + checkArgument(b.length > 0, "length must be greater than 0"); + return String.format("%x", new BigInteger(1, b)); } + /** + * Create a byte array from a string of hash digits. The length of the + * string must be a multiple of 2 + * @param hex + */ + public static byte[] fromHex(String hex) { + checkArgument(hex.length() > 0, "length must be greater than 0"); + checkArgument(hex.length() % 2 == 0, "length must be a multiple of 2"); + // Make sure letters are upper case + hex = hex.toUpperCase(); + byte[] b = new byte[hex.length() / 2]; + for (int i = 0; i < b.length; i++) { + b[i] = (byte)((toBinaryFromHex((byte)hex.charAt(2 * i)) << 4) + + toBinaryFromHex((byte)hex.charAt((2 * i + 1)))); + } + return b; + } } diff --git a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java new file mode 100644 index 0000000000..5f309c2f22 --- /dev/null +++ b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.util; + +import org.apache.hadoop.io.WritableUtils; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * Extra utilities for bytes + */ +public class BytesUtils { + /** + * @param n Long to make a VLong of. + * @return VLong as bytes array. + */ + public static byte[] vlongToBytes(long n) { + byte [] result; + int offset = 0; + if (n >= -112 && n <= 127) { + result = new byte[1]; + result[offset] = (byte) n; + return result; + } + + int len = -112; + if (n < 0) { + n ^= -1L; // take one's complement' + len = -120; + } + + long tmp = n; + while (tmp != 0) { + tmp = tmp >> 8; + len--; + } + + int size = WritableUtils.decodeVIntSize((byte) len); + + result = new byte[size]; + result[offset++] = (byte) len; + len = (len < -120) ? -(len + 120) : -(len + 112); + + for (int idx = len; idx != 0; idx--) { + int shiftbits = (idx - 1) * 8; + long mask = 0xFFL << shiftbits; + result[offset++] = (byte)((n & mask) >> shiftbits); + } + return result; + } + + public static void writeVLong(ByteArrayOutputStream byteStream, long l) { + byte[] vLongBytes = vlongToBytes(l); + byteStream.write(vLongBytes, 0, vLongBytes.length); + } + + /** + * Converts a char array to a ascii byte array. + * + * @param chars string + * @return the byte array + */ + static byte[] toASCIIBytes(char[] chars) { + byte[] buffer = new byte[chars.length]; + for (int i = 0; i < chars.length; i++) { + buffer[i] = (byte) chars[i]; + } + return buffer; + } + + public static byte[][] splitPreserveAllTokens(byte[] str, char separatorChar, int[] target) { + return splitWorker(str, 0, -1, separatorChar, true, target); + } + + public static byte[][] splitPreserveAllTokens(byte[] str, int offset, int length, char separatorChar, int[] target) { + return splitWorker(str, offset, length, separatorChar, true, target); + } + + public static byte[][] splitPreserveAllTokens(byte[] str, char separatorChar) { + return splitWorker(str, 0, -1, separatorChar, true, null); + } + + public static byte[][] splitPreserveAllTokens(byte[] str, int length, char separatorChar) { + return splitWorker(str, 0, length, separatorChar, true, null); + } + + /** + * Performs the logic for the split and + * splitPreserveAllTokens methods that do not return a + * maximum array length. + * + * @param str the String to parse, may be null + * @param length amount of bytes to str + * @param separatorChar the ascii separate character + * @param preserveAllTokens if true, adjacent separators are + * @param target the projection target + * treated as empty token separators; if false, adjacent + * separators are treated as one separator. + * @return an array of parsed Strings, null if null String input + */ + private static byte[][] splitWorker(byte[] str, int offset, int length, char separatorChar, + boolean preserveAllTokens, int[] target) { + // Performance tuned for 2.0 (JDK1.4) + + if (str == null) { + return null; + } + int len = length; + if (len == 0) { + return new byte[1][0]; + }else if(len < 0){ + len = str.length - offset; + } + + List list = new ArrayList(); + int i = 0, start = 0; + boolean match = false; + boolean lastMatch = false; + int currentTarget = 0; + int currentIndex = 0; + while (i < len) { + if (str[i + offset] == separatorChar) { + if (match || preserveAllTokens) { + if (target == null) { + byte[] bytes = new byte[i - start]; + System.arraycopy(str, start + offset, bytes, 0, bytes.length); + list.add(bytes); + } else if (target.length > currentTarget && currentIndex == target[currentTarget]) { + byte[] bytes = new byte[i - start]; + System.arraycopy(str, start + offset, bytes, 0, bytes.length); + list.add(bytes); + currentTarget++; + } else { + list.add(null); + } + currentIndex++; + match = false; + lastMatch = true; + } + start = ++i; + continue; + } + lastMatch = false; + match = true; + i++; + } + if (match || (preserveAllTokens && lastMatch)) { + if (target == null) { + byte[] bytes = new byte[i - start]; + System.arraycopy(str, start + offset, bytes, 0, bytes.length); + list.add(bytes); + } else if (target.length > currentTarget && currentIndex == target[currentTarget]) { + byte[] bytes = new byte[i - start]; + System.arraycopy(str, start + offset, bytes, 0, bytes.length); + list.add(bytes); //str.substring(start, i)); + currentTarget++; + } else { + list.add(null); + } + currentIndex++; + } + return (byte[][]) list.toArray(new byte[list.size()][]); + } +} diff --git a/tajo-common/src/main/java/org/apache/tajo/util/NumberUtil.java b/tajo-common/src/main/java/org/apache/tajo/util/NumberUtil.java index c8205eb386..d52b80497c 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/NumberUtil.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/NumberUtil.java @@ -20,7 +20,24 @@ public class NumberUtil { - public static long unsigned32(int n) { + public static final double[] powersOf10 = { /* Table giving binary powers of 10. Entry */ + 10., /* is 10^2^i. Used to convert decimal */ + 100., /* exponents into floating-point numbers. */ + 1.0e4, + 1.0e8, + 1.0e16, + 1.0e32, + 1.0e64, + 1.0e128, + 1.0e256 + }; + private static final int maxExponent = 511; /* Largest possible base 10 exponent. Any + * exponent larger than this will already + * produce underflow or overflow, so there's + * no need to worry about additional digits. + */ + + public static long unsigned32(int n) { return n & 0xFFFFFFFFL; } @@ -29,27 +46,27 @@ public static int unsigned16(short n) { } public static byte[] toAsciiBytes(Number i){ - return Bytes.toASCIIBytes(String.valueOf(i).toCharArray()); + return BytesUtils.toASCIIBytes(String.valueOf(i).toCharArray()); } public static byte[] toAsciiBytes(short i){ - return Bytes.toASCIIBytes(String.valueOf(i).toCharArray()); + return BytesUtils.toASCIIBytes(String.valueOf(i).toCharArray()); } public static byte[] toAsciiBytes(int i){ - return Bytes.toASCIIBytes(String.valueOf(i).toCharArray()); + return BytesUtils.toASCIIBytes(String.valueOf(i).toCharArray()); } public static byte[] toAsciiBytes(long i){ - return Bytes.toASCIIBytes(String.valueOf(i).toCharArray()); + return BytesUtils.toASCIIBytes(String.valueOf(i).toCharArray()); } public static byte[] toAsciiBytes(float i){ - return Bytes.toASCIIBytes(String.valueOf(i).toCharArray()); + return BytesUtils.toASCIIBytes(String.valueOf(i).toCharArray()); } public static byte[] toAsciiBytes(double i){ - return Bytes.toASCIIBytes(String.valueOf(i).toCharArray()); + return BytesUtils.toASCIIBytes(String.valueOf(i).toCharArray()); } private static void benchmark(int num){ @@ -75,7 +92,320 @@ private static void benchmark(int num){ + " ms, " + "Total: " + size / (1024 * 1024) + "MB"); } - public static void main(String[] args) throws Exception { - benchmark(1024 * 1024 * 10); + /** + * Returns the digit represented by character b. + * + * @param b The ascii code of the character + * @param radix The radix + * @return -1 if it's invalid + */ + static int digit(int b, int radix) { + int r = -1; + if (b >= '0' && b <= '9') { + r = b - '0'; + } else if (b >= 'A' && b <= 'Z') { + r = b - 'A' + 10; + } else if (b >= 'a' && b <= 'z') { + r = b - 'a' + 10; + } + if (r >= radix) { + r = -1; + } + return r; + } + + /** + * Returns the digit represented by character b, radix is 10 + * + * @param b The ascii code of the character + * @return -1 if it's invalid + */ + private static boolean isDigit(int b) { + return (b >= '0' && b <= '9'); + } + + /** + * Parses the byte array argument as if it was a double value and returns the + * result. Throws NumberFormatException if the byte array does not represent a + * double value. + * + * @return double, the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as a double + */ + public static double parseDouble(byte[] bytes, int start, int length) { + if (bytes == null) { + throw new NumberFormatException("String is null"); + } + if (length == 0) { + throw new NumberFormatException("Empty byte array!"); + } + + /* + * Strip off leading blanks + */ + int offset = start; + int end = start + length; + + while (offset < end && bytes[offset] == ' ') { + offset++; + } + if (offset == end) { + throw new NumberFormatException("blank byte array!"); + } + + /* + * check for a sign. + */ + boolean sign = false; + if (bytes[offset] == '-') { + sign = true; + offset++; + } else if (bytes[offset] == '+') { + offset++; + } + if (offset == end) { + throw new NumberFormatException("the byte array only has a sign!"); + } + + /* + * Count the number of digits in the mantissa (including the decimal + * point), and also locate the decimal point. + */ + int mantSize = 0; /* Number of digits in mantissa. */ + int decicalOffset = -1; /* Number of mantissa digits BEFORE decimal point. */ + for (; offset < end; offset++) { + if (!isDigit(bytes[offset])) { + if ((bytes[offset] != '.') || (decicalOffset >= 0)) { + break; + } + decicalOffset = mantSize; + } + mantSize++; + } + + int exponentOffset = offset; /* Temporarily holds location of exponent in bytes. */ + + /* + * Now suck up the digits in the mantissa. Use two integers to + * collect 9 digits each (this is faster than using floating-point). + * If the mantissa has more than 18 digits, ignore the extras, since + * they can't affect the value anyway. + */ + offset -= mantSize; + if (decicalOffset < 0) { + decicalOffset = mantSize; + } else { + mantSize -= 1; /* One of the digits was the decimal point. */ + } + int fracExponent; /* Exponent that derives from the fractional + * part. Under normal circumstatnces, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + if (mantSize > 18) { + fracExponent = decicalOffset - 18; + mantSize = 18; + } else { + fracExponent = decicalOffset - mantSize; + } + + if (mantSize == 0) { + return 0.0; + } + + int frac1 = 0; + for (; mantSize > 9; mantSize--) { + int b = bytes[offset]; + offset++; + if (b == '.') { + b = bytes[offset]; + offset++; + } + frac1 = 10 * frac1 + (b - '0'); + } + int frac2 = 0; + for (; mantSize > 0; mantSize--) { + int b = bytes[offset]; + offset++; + if (b == '.') { + b = bytes[offset]; + offset++; + } + frac2 = 10 * frac2 + (b - '0'); + } + double fraction = (1.0e9 * frac1) + frac2; + + /* + * Skim off the exponent. + */ + int exponent = 0; /* Exponent read from "EX" field. */ + offset = exponentOffset; + boolean expSign = false; + + if (offset < end) { + if ((bytes[offset] != 'E') && (bytes[offset] != 'e')) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + + // (bytes[offset] == 'E') || (bytes[offset] == 'e') + offset++; + + if (bytes[offset] == '-') { + expSign = true; + offset++; + } else if (bytes[offset] == '+') { + offset++; + } + + for (; offset < end; offset++) { + if (isDigit(bytes[offset])) { + exponent = exponent * 10 + (bytes[offset] - '0'); + } else { + throw new NumberFormatException(new String(bytes, start, + length)); + } + } + } + + exponent = expSign ? (fracExponent - exponent) : (fracExponent + exponent); + + /* + * Generate a floating-point number that represents the exponent. + * Do this by processing the exponent one bit at a time to combine + * many powers of 2 of 10. Then combine the exponent with the + * fraction. + */ + if (exponent < 0) { + expSign = true; + exponent = -exponent; + } else { + expSign = false; + } + if (exponent > maxExponent) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + + double dblExp = 1.0; + for (int i = 0; exponent != 0; exponent >>= 1, i++) { + if ((exponent & 01) == 01) { + dblExp *= powersOf10[i]; + } + } + + fraction = (expSign) ? (fraction / dblExp) : (fraction * dblExp); + + return sign ? (-fraction) : fraction; + } + + /** + * Parses the byte array argument as if it was an int value and returns the + * result. Throws NumberFormatException if the byte array does not represent an + * int quantity. + * + * @return int the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + public static int parseInt(byte[] bytes, int start, int length) { + return parseInt(bytes, start, length, 10); + } + + /** + * Parses the byte array argument as if it was an int value and returns the + * result. Throws NumberFormatException if the byte array does not represent an + * int quantity. The second argument specifies the radix to use when parsing + * the value. + * + * @param radix the base to use for conversion. + * @return the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + public static int parseInt(byte[] bytes, int start, int length, int radix) { + if (bytes == null) { + throw new NumberFormatException("String is null"); + } + if (radix < Character.MIN_RADIX || radix > Character.MAX_RADIX) { + throw new NumberFormatException("Invalid radix: " + radix); + } + if (length == 0) { + throw new NumberFormatException("Empty byte array!"); + } + int offset = start; + boolean negative = bytes[start] == '-'; + if (negative || bytes[start] == '+') { + offset++; + if (length == 1) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + } + + return parseIntInternal(bytes, start, length, offset, radix, negative); + } + + /** + * @param bytes + * @param start + * @param length + * @param radix the base to use for conversion. + * @param offset the starting position after the sign (if exists) + * @param radix the base to use for conversion. + * @param negative whether the number is negative. + * @return the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + private static int parseIntInternal(byte[] bytes, int start, int length, int offset, + int radix, boolean negative) { + byte separator = '.'; + int max = Integer.MIN_VALUE / radix; + int result = 0, end = start + length; + while (offset < end) { + int digit = digit(bytes[offset++], radix); + if (digit == -1) { + if (bytes[offset - 1] == separator) { + // We allow decimals and will return a truncated integer in that case. + // Therefore we won't throw an exception here (checking the fractional + // part happens below.) + break; + } + throw new NumberFormatException(new String(bytes, start, + length)); + } + if (max > result) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + int next = result * radix - digit; + if (next > result) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + result = next; + } + + // This is the case when we've encountered a decimal separator. The fractional + // part will not change the number, but we will verify that the fractional part + // is well formed. + while (offset < end) { + int digit = digit(bytes[offset++], radix); + if (digit == -1) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + } + + if (!negative) { + result = -result; + if (result < 0) { + throw new NumberFormatException(new String(bytes, start, + length)); + } + } + return result; } } diff --git a/tajo-common/src/test/java/org/apache/tajo/util/TestBytes.java b/tajo-common/src/test/java/org/apache/tajo/util/TestBytes.java deleted file mode 100644 index c26bf7d10f..0000000000 --- a/tajo-common/src/test/java/org/apache/tajo/util/TestBytes.java +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.util; - -import org.apache.commons.lang.StringUtils; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class TestBytes { - - @Test - public void testAsciiBytes() { - String asciiText = "abcde 12345 ABCDE"; - assertArrayEquals(asciiText.getBytes(), Bytes.toASCIIBytes(asciiText.toCharArray())); - } - - @Test - public void testSplitBytes() { - String text = "abcde|12345|ABCDE"; - char separatorChar = '|'; - - String[] textArray = StringUtils.splitPreserveAllTokens(text, separatorChar); - byte[][] bytesArray = Bytes.splitPreserveAllTokens(text.getBytes(), separatorChar); - - assertEquals(textArray.length, bytesArray.length); - for (int i = 0; i < textArray.length; i++){ - assertArrayEquals(textArray[i].getBytes(), bytesArray[i]); - } - } - - @Test - public void testSplitProjectionBytes() { - String text = "abcde|12345|ABCDE"; - int[] target = new int[]{ 1 }; - char separatorChar = '|'; - - String[] textArray = StringUtils.splitPreserveAllTokens(text, separatorChar); - byte[][] bytesArray = Bytes.splitPreserveAllTokens(text.getBytes(), separatorChar, target); - - assertEquals(textArray.length, bytesArray.length); - - assertNull(bytesArray[0]); - assertNotNull(bytesArray[1]); - assertArrayEquals(textArray[1].getBytes(), bytesArray[1]); - assertNull(bytesArray[2]); - } - - @Test - public void testParseInt() { - int int1 = 0; - byte[] bytes1 = Double.toString(int1).getBytes(); - assertEquals(int1, Bytes.parseInt(bytes1, 0, bytes1.length)); - - int int2 = -7; - byte[] bytes2 = Double.toString(int2).getBytes(); - assertEquals(int2, Bytes.parseInt(bytes2, 0, bytes2.length)); - - int int3 = +128; - byte[] bytes3 = Double.toString(int3).getBytes(); - assertEquals(int3, Bytes.parseInt(bytes3, 0, bytes3.length)); - - int int4 = 4; - byte[] bytes4 = Double.toString(int4).getBytes(); - assertEquals(int4, Bytes.parseInt(bytes4, 0, bytes4.length)); - - byte[] bytes5 = "0123-456789".getBytes(); - assertEquals(-456, Bytes.parseInt(bytes5, 4, 4)); - - } - - @Test - public void testParseDouble() { - double double1 = 2.0015E7; - byte[] bytes1 = Double.toString(double1).getBytes(); - assertEquals(double1, Bytes.parseDouble(bytes1, 0, bytes1.length), 0.0); - - double double2 = 1.345E-7; - byte[] bytes2 = Double.toString(double2).getBytes(); - assertEquals(double2, Bytes.parseDouble(bytes2, 0, bytes2.length), 0.0); - - double double3 = -1.345E-7; - byte[] bytes3 = Double.toString(double3).getBytes(); - assertEquals(double3, Bytes.parseDouble(bytes3, 0, bytes3.length), 0.0); - - double double4 = 4; - byte[] bytes4 = Double.toString(double4).getBytes(); - assertEquals(double4, Bytes.parseDouble(bytes4, 0, bytes4.length), 0.0); - - byte[] bytes5 = "0123456789.012345E012345".getBytes(); - assertEquals(6789.012345E01, Bytes.parseDouble(bytes5, 6, 14), 0.0); - - } - -} diff --git a/tajo-common/src/test/java/org/apache/tajo/util/TestNumberUtil.java b/tajo-common/src/test/java/org/apache/tajo/util/TestNumberUtil.java index 25851cda36..e4e3b62ee4 100644 --- a/tajo-common/src/test/java/org/apache/tajo/util/TestNumberUtil.java +++ b/tajo-common/src/test/java/org/apache/tajo/util/TestNumberUtil.java @@ -23,9 +23,9 @@ import java.util.Random; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; public class TestNumberUtil { - @Test public void testNumberToAsciiBytes() { Random r = new Random(System.currentTimeMillis()); @@ -45,4 +45,49 @@ public void testNumberToAsciiBytes() { n = r.nextDouble(); assertArrayEquals(String.valueOf(n.doubleValue()).getBytes(), NumberUtil.toAsciiBytes(n.doubleValue())); } + + @Test + public void testParseInt() { + int int1 = 0; + byte[] bytes1 = Double.toString(int1).getBytes(); + assertEquals(int1, NumberUtil.parseInt(bytes1, 0, bytes1.length)); + + int int2 = -7; + byte[] bytes2 = Double.toString(int2).getBytes(); + assertEquals(int2, NumberUtil.parseInt(bytes2, 0, bytes2.length)); + + int int3 = +128; + byte[] bytes3 = Double.toString(int3).getBytes(); + assertEquals(int3, NumberUtil.parseInt(bytes3, 0, bytes3.length)); + + int int4 = 4; + byte[] bytes4 = Double.toString(int4).getBytes(); + assertEquals(int4, NumberUtil.parseInt(bytes4, 0, bytes4.length)); + + byte[] bytes5 = "0123-456789".getBytes(); + assertEquals(-456, NumberUtil.parseInt(bytes5, 4, 4)); + + } + + @Test + public void testParseDouble() { + double double1 = 2.0015E7; + byte[] bytes1 = Double.toString(double1).getBytes(); + assertEquals(double1, NumberUtil.parseDouble(bytes1, 0, bytes1.length), 0.0); + + double double2 = 1.345E-7; + byte[] bytes2 = Double.toString(double2).getBytes(); + assertEquals(double2, NumberUtil.parseDouble(bytes2, 0, bytes2.length), 0.0); + + double double3 = -1.345E-7; + byte[] bytes3 = Double.toString(double3).getBytes(); + assertEquals(double3, NumberUtil.parseDouble(bytes3, 0, bytes3.length), 0.0); + + double double4 = 4; + byte[] bytes4 = Double.toString(double4).getBytes(); + assertEquals(double4, NumberUtil.parseDouble(bytes4, 0, bytes4.length), 0.0); + + byte[] bytes5 = "0123456789.012345E012345".getBytes(); + assertEquals(6789.012345E01, NumberUtil.parseDouble(bytes5, 6, 14), 0.0); + } } diff --git a/tajo-common/src/test/java/org/apache/tajo/util/TestStringUtil.java b/tajo-common/src/test/java/org/apache/tajo/util/TestStringUtil.java index 5c13f8fc1a..6c732c7fc4 100644 --- a/tajo-common/src/test/java/org/apache/tajo/util/TestStringUtil.java +++ b/tajo-common/src/test/java/org/apache/tajo/util/TestStringUtil.java @@ -22,8 +22,7 @@ import org.apache.commons.lang.StringEscapeUtils; import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.*; public class TestStringUtil { @@ -89,4 +88,41 @@ public void testVariousDelimiter() { assertEquals("\\u0020", StringUtils.unicodeEscapedDelimiter(spaceDelimiter)); assertEquals(spaceDelimiter, StringEscapeUtils.unescapeJava(StringUtils.unicodeEscapedDelimiter(spaceDelimiter))); } + + @Test + public void testAsciiBytes() { + String asciiText = "abcde 12345 ABCDE"; + assertArrayEquals(asciiText.getBytes(), BytesUtils.toASCIIBytes(asciiText.toCharArray())); + } + + @Test + public void testSplitBytes() { + String text = "abcde|12345|ABCDE"; + char separatorChar = '|'; + + String[] textArray = org.apache.commons.lang.StringUtils.splitPreserveAllTokens(text, separatorChar); + byte[][] bytesArray = BytesUtils.splitPreserveAllTokens(text.getBytes(), separatorChar); + + assertEquals(textArray.length, bytesArray.length); + for (int i = 0; i < textArray.length; i++){ + assertArrayEquals(textArray[i].getBytes(), bytesArray[i]); + } + } + + @Test + public void testSplitProjectionBytes() { + String text = "abcde|12345|ABCDE"; + int[] target = new int[]{ 1 }; + char separatorChar = '|'; + + String[] textArray = org.apache.commons.lang.StringUtils.splitPreserveAllTokens(text, separatorChar); + byte[][] bytesArray = BytesUtils.splitPreserveAllTokens(text.getBytes(), separatorChar, target); + + assertEquals(textArray.length, bytesArray.length); + + assertNull(bytesArray[0]); + assertNotNull(bytesArray[1]); + assertArrayEquals(textArray[1].getBytes(), bytesArray[1]); + assertNull(bytesArray[2]); + } } diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java b/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java index 0742a8089f..ad80ddfafe 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java @@ -38,10 +38,10 @@ import org.apache.tajo.storage.LazyTuple; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; -import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; import org.apache.tajo.util.CommonTestingUtil; -import org.apache.tajo.util.datetime.DateTimeUtil; import org.apache.tajo.util.KeyValueSet; +import org.apache.tajo.util.datetime.DateTimeUtil; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -170,7 +170,7 @@ public void testEval(Schema schema, String tableName, String csvTuple, String qu } lazyTuple = - new LazyTuple(inputSchema, Bytes.splitPreserveAllTokens(csvTuple.getBytes(), delimiter, targetIdx),0); + new LazyTuple(inputSchema, BytesUtils.splitPreserveAllTokens(csvTuple.getBytes(), delimiter, targetIdx),0); vtuple = new VTuple(inputSchema.size()); for (int i = 0; i < inputSchema.size(); i++) { // If null value occurs, null datum is manually inserted to an input tuple. diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java b/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java index 17b92298d7..8e26ec6110 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/CSVFile.java @@ -40,7 +40,7 @@ import org.apache.tajo.storage.exception.AlreadyExistsStorageException; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.rcfile.NonSyncByteArrayOutputStream; -import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; import java.io.*; import java.util.ArrayList; @@ -466,8 +466,8 @@ public Tuple next() throws IOException { offset = fileOffsets.get(currentIdx); } - byte[][] cells = Bytes.splitPreserveAllTokens(buffer.getData(), startOffsets.get(currentIdx), - rowLengthList.get(currentIdx), delimiter, targetColumnIndexes); + byte[][] cells = BytesUtils.splitPreserveAllTokens(buffer.getData(), startOffsets.get(currentIdx), + rowLengthList.get(currentIdx), delimiter, targetColumnIndexes); currentIdx++; return new LazyTuple(schema, cells, offset, nullChars, serde); } catch (Throwable t) { diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/RowFile.java b/tajo-storage/src/main/java/org/apache/tajo/storage/RowFile.java index 2068260f77..db36771835 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/RowFile.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/RowFile.java @@ -35,7 +35,6 @@ import org.apache.tajo.storage.exception.AlreadyExistsStorageException; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.util.BitArray; -import org.apache.tajo.util.Bytes; import java.io.FileNotFoundException; import java.io.IOException; @@ -121,7 +120,7 @@ public void init() throws IOException { private void readHeader() throws IOException { SYNC_INTERVAL = in.readInt(); - Bytes.readFully(in, this.sync, 0, SYNC_HASH_SIZE); + StorageUtil.readFully(in, this.sync, 0, SYNC_HASH_SIZE); } /** diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/StorageUtil.java b/tajo-storage/src/main/java/org/apache/tajo/storage/StorageUtil.java index 95bb96f01a..5b2d711098 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/StorageUtil.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/StorageUtil.java @@ -32,7 +32,10 @@ import parquet.hadoop.ParquetOutputFormat; import sun.nio.ch.DirectBuffer; +import java.io.DataInput; +import java.io.EOFException; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -199,4 +202,42 @@ public static void closeBuffer(ByteBuffer buffer) { } } } + + public static int readFully(InputStream is, byte[] buffer, int offset, int length) + throws IOException { + int nread = 0; + while (nread < length) { + int nbytes = is.read(buffer, offset + nread, length - nread); + if (nbytes < 0) { + return nread > 0 ? nread : nbytes; + } + nread += nbytes; + } + return nread; + } + + /** + * Similar to readFully(). Skips bytes in a loop. + * @param in The DataInput to skip bytes from + * @param len number of bytes to skip. + * @throws java.io.IOException if it could not skip requested number of bytes + * for any reason (including EOF) + */ + public static void skipFully(DataInput in, int len) throws IOException { + int amt = len; + while (amt > 0) { + long ret = in.skipBytes(amt); + if (ret == 0) { + // skip may return 0 even if we're not at EOF. Luckily, we can + // use the read() method to figure out if we're at the end. + int b = in.readByte(); + if (b == -1) { + throw new EOFException( "Premature EOF from inputStream after " + + "skipping " + (len - amt) + " byte(s)."); + } + ret = 1; + } + amt -= ret; + } + } } diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java b/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java index ad732c72f6..d2ccdc7347 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/TextSerializerDeserializer.java @@ -26,6 +26,7 @@ import org.apache.tajo.datum.*; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.NumberUtil; import java.io.IOException; import java.io.OutputStream; @@ -132,11 +133,11 @@ public Datum deserialize(Column col, byte[] bytes, int offset, int length, byte[ case INT1: case INT2: datum = isNull(bytes, offset, length, nullCharacters) ? NullDatum.get() - : DatumFactory.createInt2((short) Bytes.parseInt(bytes, offset, length)); + : DatumFactory.createInt2((short) NumberUtil.parseInt(bytes, offset, length)); break; case INT4: datum = isNull(bytes, offset, length, nullCharacters) ? NullDatum.get() - : DatumFactory.createInt4(Bytes.parseInt(bytes, offset, length)); + : DatumFactory.createInt4(NumberUtil.parseInt(bytes, offset, length)); break; case INT8: datum = isNull(bytes, offset, length, nullCharacters) ? NullDatum.get() @@ -148,7 +149,7 @@ public Datum deserialize(Column col, byte[] bytes, int offset, int length, byte[ break; case FLOAT8: datum = isNull(bytes, offset, length, nullCharacters) ? NullDatum.get() - : DatumFactory.createFloat8(Bytes.parseDouble(bytes, offset, length)); + : DatumFactory.createFloat8(NumberUtil.parseDouble(bytes, offset, length)); break; case TEXT: { byte[] chars = new byte[length]; diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index a7a144b3a4..5d43bd51be 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -30,12 +30,12 @@ import org.apache.tajo.storage.RowStoreUtil; import org.apache.tajo.storage.RowStoreUtil.RowStoreDecoder; import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; +import org.apache.tajo.storage.StorageUtil; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleComparator; import org.apache.tajo.storage.index.IndexMethod; import org.apache.tajo.storage.index.IndexWriter; import org.apache.tajo.storage.index.OrderIndexReader; -import org.apache.tajo.util.Bytes; import java.io.Closeable; import java.io.FileNotFoundException; @@ -335,7 +335,7 @@ private void readHeader() throws IOException { // schema int schemaByteSize = indexIn.readInt(); byte [] schemaBytes = new byte[schemaByteSize]; - Bytes.readFully(indexIn, schemaBytes, 0, schemaByteSize); + StorageUtil.readFully(indexIn, schemaBytes, 0, schemaByteSize); SchemaProto.Builder builder = SchemaProto.newBuilder(); builder.mergeFrom(schemaBytes); @@ -346,7 +346,7 @@ private void readHeader() throws IOException { // comparator int compByteSize = indexIn.readInt(); byte [] compBytes = new byte[compByteSize]; - Bytes.readFully(indexIn, compBytes, 0, compByteSize); + StorageUtil.readFully(indexIn, compBytes, 0, compByteSize); TupleComparatorProto.Builder compProto = TupleComparatorProto.newBuilder(); compProto.mergeFrom(compBytes); @@ -358,11 +358,11 @@ private void readHeader() throws IOException { this.entryNum = indexIn.readInt(); if (entryNum > 0) { // if there is no any entry, do not read firstKey/lastKey values byte [] minBytes = new byte[indexIn.readInt()]; - Bytes.readFully(indexIn, minBytes, 0, minBytes.length); + StorageUtil.readFully(indexIn, minBytes, 0, minBytes.length); this.firstKey = rowStoreDecoder.toTuple(minBytes); byte [] maxBytes = new byte[indexIn.readInt()]; - Bytes.readFully(indexIn, maxBytes, 0, maxBytes.length); + StorageUtil.readFully(indexIn, maxBytes, 0, maxBytes.length); this.lastKey = rowStoreDecoder.toTuple(maxBytes); } } @@ -484,7 +484,7 @@ private void fillLeafIndex(int entryNum, FSDataInputStream in, long pos) for (int i = 0; i < entryNum; i++) { counter++; buf = new byte[in.readInt()]; - Bytes.readFully(in, buf, 0, buf.length); + StorageUtil.readFully(in, buf, 0, buf.length); dataSubIndex[i] = rowStoreDecoder.toTuple(buf); int offsetNum = in.readInt(); @@ -506,7 +506,7 @@ private void fillLeafIndex(int entryNum, FSDataInputStream in, long pos) byte[] buf; for (int i = 0; i < counter; i++) { buf = new byte[in.readInt()]; - Bytes.readFully(in, buf, 0, buf.length); + StorageUtil.readFully(in, buf, 0, buf.length); dataSubIndex[i] = rowStoreDecoder.toTuple(buf); int offsetNum = in.readInt(); @@ -535,7 +535,7 @@ private void fillRootIndex(int entryNum, FSDataInputStream in) byte[] buf; for (int i = 0; i < entryNum; i++) { buf = new byte[in.readInt()]; - Bytes.readFully(in, buf, 0, buf.length); + StorageUtil.readFully(in, buf, 0, buf.length); keyTuple = rowStoreDecoder.toTuple(buf); dataIndex[i] = keyTuple; this.offsetIndex[i] = in.readLong(); diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java b/tajo-storage/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java index 4cc37b3fb2..78498c750a 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java @@ -39,7 +39,6 @@ import org.apache.tajo.datum.NullDatum; import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FileFragment; -import org.apache.tajo.util.Bytes; import java.io.Closeable; import java.io.*; @@ -489,7 +488,7 @@ public void readFields(DataInput in) throws IOException { } if (skipTotal != 0) { - Bytes.skipFully(in, skipTotal); + StorageUtil.skipFully(in, skipTotal); skipTotal = 0; } @@ -528,7 +527,7 @@ public void readFields(DataInput in) throws IOException { } if (skipTotal != 0) { - Bytes.skipFully(in, skipTotal); + StorageUtil.skipFully(in, skipTotal); } } diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java b/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java index 9eb1b2d91d..b150a9a340 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java @@ -43,7 +43,7 @@ import org.apache.tajo.storage.*; import org.apache.tajo.storage.exception.AlreadyExistsStorageException; import org.apache.tajo.storage.rcfile.NonSyncByteArrayOutputStream; -import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; import java.io.FileNotFoundException; import java.io.IOException; @@ -177,16 +177,16 @@ public void addTuple(Tuple tuple) throws IOException { switch (schema.getColumn(j).getDataType().getType()) { case TEXT: - Bytes.writeVLong(os, datum.asTextBytes().length); + BytesUtils.writeVLong(os, datum.asTextBytes().length); break; case PROTOBUF: ProtobufDatum protobufDatum = (ProtobufDatum) datum; - Bytes.writeVLong(os, protobufDatum.asByteArray().length); + BytesUtils.writeVLong(os, protobufDatum.asByteArray().length); break; case CHAR: case INET4: case BLOB: - Bytes.writeVLong(os, datum.asByteArray().length); + BytesUtils.writeVLong(os, datum.asByteArray().length); break; default: } diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java index ccf3d9ed03..32d1d57688 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java @@ -36,7 +36,7 @@ import org.apache.tajo.datum.NullDatum; import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FileFragment; -import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; import java.io.IOException; @@ -164,7 +164,7 @@ public Tuple next() throws IOException { } else { Text text = new Text(); reader.getCurrentValue(text); - cells = Bytes.splitPreserveAllTokens(text.getBytes(), delimiter, projectionMap); + cells = BytesUtils.splitPreserveAllTokens(text.getBytes(), delimiter, projectionMap); totalBytes += (long)text.getBytes().length; tuple = new LazyTuple(schema, cells, 0, nullChars, serde); } diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/CSVFileScanner.java b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/CSVFileScanner.java index 19209bdb80..e15ca6e3c3 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/CSVFileScanner.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/CSVFileScanner.java @@ -31,7 +31,7 @@ import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.compress.CodecPool; import org.apache.tajo.storage.fragment.FileFragment; -import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; import java.io.DataInputStream; import java.io.IOException; @@ -221,10 +221,10 @@ private void page() throws IOException { if (prevTailLen == 0) { tail = new byte[0]; - tuples = Bytes.splitPreserveAllTokens(buf, rbyte, (char) LF); + tuples = BytesUtils.splitPreserveAllTokens(buf, rbyte, (char) LF); } else { byte[] lastRow = ArrayUtils.addAll(tail, buf); - tuples = Bytes.splitPreserveAllTokens(lastRow, rbyte + tail.length, (char) LF); + tuples = BytesUtils.splitPreserveAllTokens(lastRow, rbyte + tail.length, (char) LF); tail = null; } @@ -294,7 +294,7 @@ protected Tuple nextTuple() throws IOException { offset = this.tupleOffsets[currentIdx]; } - byte[][] cells = Bytes.splitPreserveAllTokens(tuples[currentIdx++], delimiter, targetColumnIndexes); + byte[][] cells = BytesUtils.splitPreserveAllTokens(tuples[currentIdx++], delimiter, targetColumnIndexes); return new LazyTuple(schema, cells, offset); } catch (Throwable t) { LOG.error(t.getMessage(), t); diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/RCFile.java b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/RCFile.java index 47dce74d2e..ac585983a3 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/v2/RCFile.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/v2/RCFile.java @@ -29,8 +29,8 @@ import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.storage.StorageUtil; import org.apache.tajo.storage.rcfile.*; -import org.apache.tajo.util.Bytes; import java.io.*; import java.rmi.server.UID; @@ -485,7 +485,7 @@ public void readFields(DataInput in) throws IOException { } if (skipTotal != 0) { - Bytes.skipFully(in, skipTotal); + StorageUtil.skipFully(in, skipTotal); skipTotal = 0; } @@ -512,7 +512,7 @@ public void readFields(DataInput in) throws IOException { } if (skipTotal != 0) { - Bytes.skipFully(in, skipTotal); + StorageUtil.skipFully(in, skipTotal); } } diff --git a/tajo-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java b/tajo-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java index cb2f7a6868..c6149f72df 100644 --- a/tajo-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java +++ b/tajo-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java @@ -23,7 +23,7 @@ import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; -import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; import org.junit.Before; import org.junit.Test; @@ -69,7 +69,7 @@ public void setUp() { sb.append(DatumFactory.createInet4("192.168.0.1")).append('|'); sb.append(new String(nullbytes)).append('|'); sb.append(NullDatum.get()); - textRow = Bytes.splitPreserveAllTokens(sb.toString().getBytes(), '|'); + textRow = BytesUtils.splitPreserveAllTokens(sb.toString().getBytes(), '|'); serde = new TextSerializerDeserializer(); } @@ -220,7 +220,7 @@ public void testPutTuple() { @Test public void testInvalidNumber() { - byte[][] bytes = Bytes.splitPreserveAllTokens(" 1| |2 ||".getBytes(), '|'); + byte[][] bytes = BytesUtils.splitPreserveAllTokens(" 1| |2 ||".getBytes(), '|'); Schema schema = new Schema(); schema.addColumn("col1", TajoDataTypes.Type.INT2); schema.addColumn("col2", TajoDataTypes.Type.INT4); From 89287cb4b69354cbab1ffa4485c58684c384c982 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Mon, 21 Jul 2014 17:13:36 +0900 Subject: [PATCH 16/21] initial work. --- .../planner/RangePartitionAlgorithm.java | 48 +++++++++++++++++-- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java index 0aa6f975a0..6c0b3846e9 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java @@ -25,8 +25,10 @@ import org.apache.tajo.datum.NullDatum; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; +import org.apache.tajo.util.Bytes; import java.math.BigDecimal; +import java.math.BigInteger; public abstract class RangePartitionAlgorithm { protected SortSpec [] sortSpecs; @@ -113,15 +115,51 @@ public static BigDecimal computeCardinality(DataType dataType, Datum start, Datu columnCard = new BigDecimal(start.asInt8() - end.asInt8()); } break; - case TEXT: - final char textStart = (start instanceof NullDatum || start.size() == 0) ? '0' : start.asChars().charAt(0); - final char textEnd = (end instanceof NullDatum || end.size() == 0) ? '0' : end.asChars().charAt(0); + case TEXT: { + byte [] aPadded; + byte [] bPadded; + byte [] a; + byte [] b; if (isAscending) { - columnCard = new BigDecimal(textEnd - textStart); + a = start.asByteArray(); + b = end.asByteArray(); } else { - columnCard = new BigDecimal(textStart - textEnd); + b = start.asByteArray(); + a = end.asByteArray(); } + + if (start.asByteArray().length < b.length) { + aPadded = Bytes.padTail(a, b.length - a.length); + bPadded = b; + } else if (b.length < a.length) { + aPadded = a; + bPadded = Bytes.padTail(b, a.length - b.length); + } else { + aPadded = a; + bPadded = b; + } + +// if (Bytes.compareTo(aPadded, bPadded) >= 0) { +// throw new IllegalArgumentException("end <= begin"); +// } + byte [] prependHeader = {1, 0}; + final BigInteger startBI = new BigInteger(Bytes.add(prependHeader, aPadded)); + final BigInteger stopBI = new BigInteger(Bytes.add(prependHeader, bPadded)); + BigInteger diffBI = stopBI.subtract(startBI); + if (inclusive) { + diffBI = diffBI.add(BigInteger.ONE); + } + columnCard = new BigDecimal(diffBI); + +// final char textStart = (start instanceof NullDatum || start.size() == 0) ? '0' : start.asChars().charAt(0); +// final char textEnd = (end instanceof NullDatum || end.size() == 0) ? '0' : end.asChars().charAt(0); +// if (isAscending) { +// columnCard = new BigDecimal(textEnd - textStart); +// } else { +// columnCard = new BigDecimal(textStart - textEnd); +// } break; + } case DATE: if (isAscending) { columnCard = new BigDecimal(end.asInt4() - start.asInt4()); From 9602e9f5036eb078751dc786c0e64ca256d60d6f Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Tue, 22 Jul 2014 14:38:00 +0900 Subject: [PATCH 17/21] Changed data type to BigInteger from BigDecimal. --- .../java/org/apache/tajo/util/BytesUtils.java | 17 +++ .../planner/RangePartitionAlgorithm.java | 100 ++++++--------- .../engine/planner/UniformRangePartition.java | 114 +++++++++++------- .../planner/TestUniformRangePartition.java | 35 ++++++ 4 files changed, 159 insertions(+), 107 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java index 5f309c2f22..8c10f59431 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java @@ -179,4 +179,21 @@ private static byte[][] splitWorker(byte[] str, int offset, int length, char sep } return (byte[][]) list.toArray(new byte[list.size()][]); } + + public static Pair padBytes(byte [] a, byte [] b) { + byte [] aPadded; + byte [] bPadded; + + if (a.length < b.length) { + aPadded = Bytes.padTail(a, b.length - a.length); + bPadded = b; + } else if (b.length < a.length) { + aPadded = a; + bPadded = Bytes.padTail(b, a.length - b.length); + } else { + aPadded = a; + bPadded = b; + } + return new Pair(aPadded, bPadded); + } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java index 6c0b3846e9..bc285e8f97 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java @@ -26,6 +26,8 @@ import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; +import org.apache.tajo.util.Pair; import java.math.BigDecimal; import java.math.BigInteger; @@ -33,7 +35,7 @@ public abstract class RangePartitionAlgorithm { protected SortSpec [] sortSpecs; protected TupleRange range; - protected final BigDecimal totalCard; + protected final BigInteger totalCard; /** true if the end of the range is inclusive. Otherwise, it should be false. */ protected final boolean inclusive; @@ -58,61 +60,63 @@ public RangePartitionAlgorithm(SortSpec [] sortSpecs, TupleRange totalRange, boo * @param end * @return */ - public static BigDecimal computeCardinality(DataType dataType, Datum start, Datum end, + public static BigInteger computeCardinality(DataType dataType, Datum start, Datum end, boolean inclusive, boolean isAscending) { - BigDecimal columnCard; + BigInteger columnCard; switch (dataType.getType()) { case BOOLEAN: - columnCard = new BigDecimal(2); + columnCard = BigInteger.valueOf(2); break; case CHAR: if (isAscending) { - columnCard = new BigDecimal(end.asChar() - start.asChar()); + columnCard = BigInteger.valueOf((int)end.asChar() - (int)start.asChar()); } else { - columnCard = new BigDecimal(start.asChar() - end.asChar()); + columnCard = BigInteger.valueOf(start.asChar() - end.asChar()); } break; case BIT: if (isAscending) { - columnCard = new BigDecimal(end.asByte() - start.asByte()); + columnCard = BigInteger.valueOf(end.asByte() - start.asByte()); } else { - columnCard = new BigDecimal(start.asByte() - end.asByte()); + columnCard = BigInteger.valueOf(start.asByte() - end.asByte()); } break; case INT2: if (isAscending) { - columnCard = new BigDecimal(end.asInt2() - start.asInt2()); + columnCard = BigInteger.valueOf(end.asInt2() - start.asInt2()); } else { - columnCard = new BigDecimal(start.asInt2() - end.asInt2()); + columnCard = BigInteger.valueOf(start.asInt2() - end.asInt2()); } break; case INT4: if (isAscending) { - columnCard = new BigDecimal(end.asInt4() - start.asInt4()); + columnCard = BigInteger.valueOf(end.asInt4() - start.asInt4()); } else { - columnCard = new BigDecimal(start.asInt4() - end.asInt4()); + columnCard = BigInteger.valueOf(start.asInt4() - end.asInt4()); } break; - case INT8: + case INT8: + case TIME: + case TIMESTAMP: if (isAscending) { - columnCard = new BigDecimal(end.asInt8() - start.asInt8()); + columnCard = BigInteger.valueOf(end.asInt8() - start.asInt8()); } else { - columnCard = new BigDecimal(start.asInt8() - end.asInt8()); + columnCard = BigInteger.valueOf(start.asInt8() - end.asInt8()); } break; case FLOAT4: if (isAscending) { - columnCard = new BigDecimal(end.asInt4() - start.asInt4()); + columnCard = BigInteger.valueOf(end.asInt4() - start.asInt4()); } else { - columnCard = new BigDecimal(start.asInt4() - end.asInt4()); + columnCard = BigInteger.valueOf(start.asInt4() - end.asInt4()); } break; case FLOAT8: if (isAscending) { - columnCard = new BigDecimal(end.asInt8() - start.asInt8()); + columnCard = BigInteger.valueOf(end.asInt8() - start.asInt8()); } else { - columnCard = new BigDecimal(start.asInt8() - end.asInt8()); + columnCard = BigInteger.valueOf(start.asInt8() - end.asInt8()); } break; case TEXT: { @@ -128,83 +132,53 @@ public static BigDecimal computeCardinality(DataType dataType, Datum start, Datu a = end.asByteArray(); } - if (start.asByteArray().length < b.length) { - aPadded = Bytes.padTail(a, b.length - a.length); - bPadded = b; - } else if (b.length < a.length) { - aPadded = a; - bPadded = Bytes.padTail(b, a.length - b.length); - } else { - aPadded = a; - bPadded = b; - } + Pair paddedPair = BytesUtils.padBytes(a, b); + aPadded = paddedPair.getFirst(); + bPadded = paddedPair.getSecond(); -// if (Bytes.compareTo(aPadded, bPadded) >= 0) { -// throw new IllegalArgumentException("end <= begin"); -// } byte [] prependHeader = {1, 0}; final BigInteger startBI = new BigInteger(Bytes.add(prependHeader, aPadded)); final BigInteger stopBI = new BigInteger(Bytes.add(prependHeader, bPadded)); BigInteger diffBI = stopBI.subtract(startBI); - if (inclusive) { - diffBI = diffBI.add(BigInteger.ONE); - } - columnCard = new BigDecimal(diffBI); - -// final char textStart = (start instanceof NullDatum || start.size() == 0) ? '0' : start.asChars().charAt(0); -// final char textEnd = (end instanceof NullDatum || end.size() == 0) ? '0' : end.asChars().charAt(0); -// if (isAscending) { -// columnCard = new BigDecimal(textEnd - textStart); -// } else { -// columnCard = new BigDecimal(textStart - textEnd); -// } + columnCard = diffBI; break; } case DATE: if (isAscending) { - columnCard = new BigDecimal(end.asInt4() - start.asInt4()); - } else { - columnCard = new BigDecimal(start.asInt4() - end.asInt4()); - } - break; - case TIME: - case TIMESTAMP: - if (isAscending) { - columnCard = new BigDecimal(end.asInt8() - start.asInt8()); + columnCard = BigInteger.valueOf(end.asInt4() - start.asInt4()); } else { - columnCard = new BigDecimal(start.asInt8() - end.asInt8()); + columnCard = BigInteger.valueOf(start.asInt4() - end.asInt4()); } break; case INET4: if (isAscending) { - columnCard = new BigDecimal(end.asInt4() - start.asInt4()); + columnCard = BigInteger.valueOf(end.asInt4() - start.asInt4()); } else { - columnCard = new BigDecimal(start.asInt4() - end.asInt4()); + columnCard = BigInteger.valueOf(start.asInt4() - end.asInt4()); } break; default: throw new UnsupportedOperationException(dataType + " is not supported yet"); } - return inclusive ? columnCard.add(new BigDecimal(1)).abs() : columnCard.abs(); + return inclusive ? columnCard.add(BigInteger.valueOf(1)).abs() : columnCard.abs(); } /** * It computes the value cardinality of a tuple range. * @return */ - public static BigDecimal computeCardinalityForAllColumns(SortSpec[] sortSpecs, TupleRange range, boolean inclusive) { + public static BigInteger computeCardinalityForAllColumns(SortSpec[] sortSpecs, TupleRange range, boolean inclusive) { Tuple start = range.getStart(); Tuple end = range.getEnd(); - Column col; - BigDecimal cardinality = new BigDecimal(1); - BigDecimal columnCard; + BigInteger cardinality = BigInteger.ONE; + BigInteger columnCard; for (int i = 0; i < sortSpecs.length; i++) { columnCard = computeCardinality(sortSpecs[i].getSortKey().getDataType(), start.get(i), end.get(i), inclusive, sortSpecs[i].isAscending()); - if (new BigDecimal(0).compareTo(columnCard) < 0) { + if (BigInteger.ZERO.compareTo(columnCard) < 0) { cardinality = cardinality.multiply(columnCard); } } @@ -212,7 +186,7 @@ public static BigDecimal computeCardinalityForAllColumns(SortSpec[] sortSpecs, T return cardinality; } - public BigDecimal getTotalCardinality() { + public BigInteger getTotalCardinality() { return totalCard; } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java index 88cb061498..67bed98d00 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java @@ -20,26 +20,29 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.primitives.UnsignedInteger; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; -import org.apache.tajo.datum.NullDatum; import org.apache.tajo.engine.exception.RangeOverflowException; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; import org.apache.tajo.storage.VTuple; import org.apache.tajo.util.Bytes; +import org.apache.tajo.util.BytesUtils; +import org.apache.tajo.util.Pair; import java.math.BigDecimal; +import java.math.BigInteger; import java.math.RoundingMode; import java.util.List; public class UniformRangePartition extends RangePartitionAlgorithm { private int variableId; - private BigDecimal[] cardForEachDigit; - private BigDecimal[] colCards; + private BigInteger[] cardForEachDigit; + private BigInteger[] colCards; /** * @@ -49,13 +52,13 @@ public class UniformRangePartition extends RangePartitionAlgorithm { */ public UniformRangePartition(TupleRange totalRange, SortSpec[] sortSpecs, boolean inclusive) { super(sortSpecs, totalRange, inclusive); - colCards = new BigDecimal[sortSpecs.length]; + colCards = new BigInteger[sortSpecs.length]; for (int i = 0; i < sortSpecs.length; i++) { colCards[i] = computeCardinality(sortSpecs[i].getSortKey().getDataType(), totalRange.getStart().get(i), totalRange.getEnd().get(i), inclusive, sortSpecs[i].isAscending()); } - cardForEachDigit = new BigDecimal[colCards.length]; + cardForEachDigit = new BigInteger[colCards.length]; for (int i = 0; i < colCards.length ; i++) { if (i == 0) { cardForEachDigit[i] = colCards[i]; @@ -74,17 +77,17 @@ public TupleRange[] partition(int partNum) { Preconditions.checkArgument(partNum > 0, "The number of partitions must be positive, but the given number: " + partNum); - Preconditions.checkArgument(totalCard.compareTo(new BigDecimal(partNum)) >= 0, + Preconditions.checkArgument(totalCard.compareTo(BigInteger.valueOf(partNum)) >= 0, "the number of partition cannot exceed total cardinality (" + totalCard + ")"); int varId; for (varId = 0; varId < cardForEachDigit.length; varId++) { - if (cardForEachDigit[varId].compareTo(new BigDecimal(partNum)) >= 0) + if (cardForEachDigit[varId].compareTo(BigInteger.valueOf(partNum)) >= 0) break; } this.variableId = varId; - BigDecimal [] reverseCardsForDigit = new BigDecimal[variableId+1]; + BigInteger [] reverseCardsForDigit = new BigInteger[variableId+1]; for (int i = variableId; i >= 0; i--) { if (i == variableId) { reverseCardsForDigit[i] = colCards[i]; @@ -94,11 +97,13 @@ public TupleRange[] partition(int partNum) { } List ranges = Lists.newArrayList(); - BigDecimal term = reverseCardsForDigit[0].divide( - new BigDecimal(partNum), RoundingMode.CEILING); - BigDecimal reminder = reverseCardsForDigit[0]; + + BigDecimal x = new BigDecimal(reverseCardsForDigit[0]); + + BigInteger term = x.divide(BigDecimal.valueOf(partNum), RoundingMode.CEILING).toBigInteger(); + BigInteger reminder = reverseCardsForDigit[0]; Tuple last = range.getStart(); - while(reminder.compareTo(new BigDecimal(0)) > 0) { + while(reminder.compareTo(BigInteger.ZERO) > 0) { if (reminder.compareTo(term) <= 0) { // final one is inclusive ranges.add(new TupleRange(sortSpecs, last, range.getEnd())); } else { @@ -121,46 +126,47 @@ public TupleRange[] partition(int partNum) { * @param sortSpecs * @return */ - public boolean isOverflow(int colId, Datum last, BigDecimal inc, SortSpec [] sortSpecs) { + public boolean isOverflow(int colId, Datum last, BigInteger inc, SortSpec [] sortSpecs) { Column column = sortSpecs[colId].getSortKey(); + BigDecimal incDecimal = new BigDecimal(inc); BigDecimal candidate; boolean overflow = false; switch (column.getDataType().getType()) { case BIT: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal(last.asByte())); + candidate = incDecimal.add(new BigDecimal(last.asByte())); return new BigDecimal(range.getEnd().get(colId).asByte()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(last.asByte()).subtract(inc); + candidate = new BigDecimal(last.asByte()).subtract(incDecimal); return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asByte())) < 0; } } case CHAR: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal((int)last.asChar())); + candidate = incDecimal.add(new BigDecimal((int)last.asChar())); return new BigDecimal((int)range.getEnd().get(colId).asChar()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal((int)last.asChar()).subtract(inc); + candidate = new BigDecimal((int)last.asChar()).subtract(incDecimal); return candidate.compareTo(new BigDecimal((int)range.getEnd().get(colId).asChar())) < 0; } } case INT2: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal(last.asInt2())); + candidate = incDecimal.add(new BigDecimal(last.asInt2())); return new BigDecimal(range.getEnd().get(colId).asInt2()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(last.asInt2()).subtract(inc); + candidate = new BigDecimal(last.asInt2()).subtract(incDecimal); return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt2())) < 0; } } case DATE: case INT4: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal(last.asInt4())); + candidate = incDecimal.add(new BigDecimal(last.asInt4())); return new BigDecimal(range.getEnd().get(colId).asInt4()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(last.asInt4()).subtract(inc); + candidate = new BigDecimal(last.asInt4()).subtract(incDecimal); return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt4())) < 0; } } @@ -168,54 +174,67 @@ public boolean isOverflow(int colId, Datum last, BigDecimal inc, SortSpec [] sor case TIMESTAMP: case INT8: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal(last.asInt8())); + candidate = incDecimal.add(new BigDecimal(last.asInt8())); return new BigDecimal(range.getEnd().get(colId).asInt8()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(last.asInt8()).subtract(inc); + candidate = new BigDecimal(last.asInt8()).subtract(incDecimal); return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt8())) < 0; } } case FLOAT4: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal(last.asFloat4())); + candidate = incDecimal.add(new BigDecimal(last.asFloat4())); return new BigDecimal(range.getEnd().get(colId).asFloat4()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(last.asFloat4()).subtract(inc); + candidate = new BigDecimal(last.asFloat4()).subtract(incDecimal); return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asFloat4())) < 0; } } case FLOAT8: { if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal(last.asFloat8())); + candidate = incDecimal.add(new BigDecimal(last.asFloat8())); return new BigDecimal(range.getEnd().get(colId).asFloat8()).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(last.asFloat8()).subtract(inc); + candidate = new BigDecimal(last.asFloat8()).subtract(incDecimal); return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asFloat8())) < 0; } } case TEXT: { + byte [] lastBytesPadded; + byte [] endBytesPadded; + + byte [] lastBytes = last.asByteArray(); + byte [] endBytes = range.getEnd().getBytes(colId); + + Pair paddedPair = BytesUtils.padBytes(lastBytes, endBytes); + lastBytesPadded = paddedPair.getFirst(); + endBytesPadded = paddedPair.getSecond(); + + UnsignedInteger lastUInt = UnsignedInteger.valueOf(new BigInteger(lastBytesPadded)); + UnsignedInteger endUInt = UnsignedInteger.valueOf(new BigInteger(endBytesPadded)); + if (sortSpecs[colId].isAscending()) { - candidate = inc.add(new BigDecimal((int)(last instanceof NullDatum ? '0' : last.asChars().charAt(0)))); - return new BigDecimal(range.getEnd().get(colId).asChars().charAt(0)).compareTo(candidate) < 0; + candidate = incDecimal.add(new BigDecimal(new BigInteger(lastBytesPadded))); + return new BigDecimal(new BigInteger(endBytesPadded)).compareTo(candidate) < 0; } else { - candidate = new BigDecimal((int)(last.asChars().charAt(0))).subtract(inc); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asChars().charAt(0))) < 0; + candidate = new BigDecimal(new BigInteger(lastBytesPadded)).subtract(incDecimal); + return candidate.compareTo(new BigDecimal(new BigInteger(endBytesPadded))) < 0; } } case INET4: { int candidateIntVal; byte[] candidateBytesVal = new byte[4]; if (sortSpecs[colId].isAscending()) { - candidateIntVal = inc.intValue() + last.asInt4(); - if (candidateIntVal - inc.intValue() != last.asInt4()) { + candidateIntVal = incDecimal.intValue() + last.asInt4(); + if (candidateIntVal - incDecimal.intValue() != last.asInt4()) { return true; } Bytes.putInt(candidateBytesVal, 0, candidateIntVal); return Bytes.compareTo(range.getEnd().get(colId).asByteArray(), candidateBytesVal) < 0; } else { - candidateIntVal = last.asInt4() - inc.intValue(); - if (candidateIntVal + inc.intValue() != last.asInt4()) { + candidateIntVal = last.asInt4() - incDecimal.intValue(); + if (candidateIntVal + incDecimal.intValue() != last.asInt4()) { return true; } Bytes.putInt(candidateBytesVal, 0, candidateIntVal); @@ -289,12 +308,12 @@ public long incrementAndGetReminder(int colId, Datum last, long inc) { * @return */ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { - BigDecimal [] incs = new BigDecimal[last.size()]; + BigInteger [] incs = new BigInteger[last.size()]; boolean [] overflowFlag = new boolean[last.size()]; - BigDecimal [] result; - BigDecimal value = new BigDecimal(inc); + BigInteger [] result; + BigInteger value = BigInteger.valueOf(inc); - BigDecimal [] reverseCardsForDigit = new BigDecimal[baseDigit + 1]; + BigInteger [] reverseCardsForDigit = new BigInteger[baseDigit + 1]; for (int i = baseDigit; i >= 0; i--) { if (i == baseDigit) { reverseCardsForDigit[i] = colCards[i]; @@ -316,8 +335,8 @@ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { throw new RangeOverflowException(range, last, incs[i].longValue()); } long rem = incrementAndGetReminder(i, last.get(i), value.longValue()); - incs[i] = new BigDecimal(rem); - incs[i - 1] = incs[i-1].add(new BigDecimal(1)); + incs[i] = BigInteger.valueOf(rem); + incs[i - 1] = incs[i-1].add(BigInteger.ONE); overflowFlag[i] = true; } else { if (i > 0) { @@ -329,7 +348,7 @@ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { for (int i = 0; i < incs.length; i++) { if (incs[i] == null) { - incs[i] = new BigDecimal(0); + incs[i] = BigInteger.ZERO; } } @@ -402,8 +421,15 @@ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { end.put(i, DatumFactory.createText(((char) (range.getStart().get(i).asChars().charAt(0) + incs[i].longValue())) + "")); } else { - end.put(i, DatumFactory.createText( - ((char) ((last.get(i) instanceof NullDatum ? '0': last.get(i).asChars().charAt(0)) + incs[i].longValue())) + "")); + byte [] incBytes = incs[i].toByteArray(); + byte [] lastBytes = last.getBytes(i); + + Pair paddedPair = BytesUtils.padBytes(incBytes, lastBytes); + + UnsignedInteger incBigInt = UnsignedInteger.valueOf(new BigInteger(paddedPair.getFirst())); + UnsignedInteger lastBigInt = UnsignedInteger.valueOf(new BigInteger(paddedPair.getSecond())); + + end.put(i, DatumFactory.createText(incBigInt.add(lastBigInt).bigIntegerValue().toByteArray())); } break; case DATE: diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java index f4c114f63b..d191c3bf4d 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java @@ -18,6 +18,8 @@ package org.apache.tajo.engine.planner; +import com.google.common.primitives.UnsignedBytes; +import com.google.common.primitives.UnsignedInts; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.common.TajoDataTypes.Type; @@ -27,6 +29,9 @@ import org.apache.tajo.storage.VTuple; import org.junit.Test; +import java.math.BigDecimal; +import java.math.BigInteger; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -359,6 +364,36 @@ public void testPartitionForOnePartNumWithOneOfTheValueNull() { assertEquals(expected, ranges[0]); } + @Test + public void testPartitionForMultipleChars() { + Schema schema = new Schema() + .addColumn("KEY1", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + + Tuple s = new VTuple(1); + s.put(0, DatumFactory.createText("AAA")); + Tuple e = new VTuple(1); + e.put(0, DatumFactory.createText("ZZZ")); + + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner = + new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(48); + + TupleRange prev = null; + for (TupleRange r : ranges) { + if (prev == null) { + prev = r; + } else { + assertTrue(prev.compareTo(r) < 0); + } + } + assertEquals(48, ranges.length); + assertTrue(ranges[0].getStart().equals(s)); + assertTrue(ranges[47].getEnd().equals(e)); + } + @Test public void testPartitionForOnePartNumWithBothValueNull() { Schema schema = new Schema() From f4a83c2b6fc71485d856680b5f62b7a5859ddb8b Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Tue, 22 Jul 2014 17:02:56 +0900 Subject: [PATCH 18/21] Improved logic. --- .../java/org/apache/tajo/util/BytesUtils.java | 34 +++++++++++-------- .../planner/RangePartitionAlgorithm.java | 6 ++-- .../engine/planner/UniformRangePartition.java | 33 ++++++++---------- .../master/querymaster/Repartitioner.java | 11 +++--- .../planner/TestUniformRangePartition.java | 32 +++++++++++++++++ 5 files changed, 77 insertions(+), 39 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java index 8c10f59431..9e0b24877f 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java @@ -180,20 +180,26 @@ private static byte[][] splitWorker(byte[] str, int offset, int length, char sep return (byte[][]) list.toArray(new byte[list.size()][]); } - public static Pair padBytes(byte [] a, byte [] b) { - byte [] aPadded; - byte [] bPadded; - - if (a.length < b.length) { - aPadded = Bytes.padTail(a, b.length - a.length); - bPadded = b; - } else if (b.length < a.length) { - aPadded = a; - bPadded = Bytes.padTail(b, a.length - b.length); - } else { - aPadded = a; - bPadded = b; + public static byte[][] padBytes(byte []...bytes) { + byte [][] padded = new byte[bytes.length][]; + + int maxLen = Integer.MIN_VALUE; + + for (int i = 0; i < bytes.length; i++) { + maxLen = Math.max(maxLen, bytes[i].length); } - return new Pair(aPadded, bPadded); + + for (int i = 0; i < bytes.length; i++) { + int padLen = maxLen - bytes[i].length; + if (padLen == 0) { + padded[i] = bytes[i]; + } else if (padLen > 0) { + padded[i] = Bytes.padTail(bytes[i], padLen); + } else { + throw new RuntimeException("maximum length: " + maxLen + ", bytes[" + i + "].length:" + bytes[i].length); + } + } + + return padded; } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java index bc285e8f97..5c889d9eab 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java @@ -132,9 +132,9 @@ public static BigInteger computeCardinality(DataType dataType, Datum start, Datu a = end.asByteArray(); } - Pair paddedPair = BytesUtils.padBytes(a, b); - aPadded = paddedPair.getFirst(); - bPadded = paddedPair.getSecond(); + byte [][] padded = BytesUtils.padBytes(a, b); + aPadded = padded[0]; + bPadded = padded[1]; byte [] prependHeader = {1, 0}; final BigInteger startBI = new BigInteger(Bytes.add(prependHeader, aPadded)); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java index 67bed98d00..edb24f7dfe 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java @@ -207,12 +207,9 @@ public boolean isOverflow(int colId, Datum last, BigInteger inc, SortSpec [] sor byte [] lastBytes = last.asByteArray(); byte [] endBytes = range.getEnd().getBytes(colId); - Pair paddedPair = BytesUtils.padBytes(lastBytes, endBytes); - lastBytesPadded = paddedPair.getFirst(); - endBytesPadded = paddedPair.getSecond(); - - UnsignedInteger lastUInt = UnsignedInteger.valueOf(new BigInteger(lastBytesPadded)); - UnsignedInteger endUInt = UnsignedInteger.valueOf(new BigInteger(endBytesPadded)); + byte [][] padded = BytesUtils.padBytes(lastBytes, endBytes); + lastBytesPadded = padded[0]; + endBytesPadded = padded[1]; if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(new BigInteger(lastBytesPadded))); @@ -290,9 +287,16 @@ public long incrementAndGetReminder(int colId, Datum last, long inc) { break; } case TEXT: { - char candidate = ((char)(last.asChars().charAt(0) + inc)); - char end = range.getEnd().get(colId).asChars().charAt(0); - reminder = (char) (candidate - end); + byte [] lastBytes = last.asByteArray(); + byte [] endBytes = range.getEnd().get(colId).asByteArray(); + + byte [][] padded = BytesUtils.padBytes(lastBytes, endBytes); + BigInteger lastBInt = new BigInteger(padded[0]); + BigInteger endBInt = new BigInteger(padded[1]); + BigInteger incBInt = BigInteger.valueOf(inc); + + BigInteger candidate = lastBInt.add(incBInt); + reminder = candidate.subtract(endBInt).longValue(); break; } } @@ -421,15 +425,8 @@ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { end.put(i, DatumFactory.createText(((char) (range.getStart().get(i).asChars().charAt(0) + incs[i].longValue())) + "")); } else { - byte [] incBytes = incs[i].toByteArray(); - byte [] lastBytes = last.getBytes(i); - - Pair paddedPair = BytesUtils.padBytes(incBytes, lastBytes); - - UnsignedInteger incBigInt = UnsignedInteger.valueOf(new BigInteger(paddedPair.getFirst())); - UnsignedInteger lastBigInt = UnsignedInteger.valueOf(new BigInteger(paddedPair.getSecond())); - - end.put(i, DatumFactory.createText(incBigInt.add(lastBigInt).bigIntegerValue().toByteArray())); + UnsignedInteger lastBigInt = UnsignedInteger.valueOf(new BigInteger(last.get(i).asByteArray())); + end.put(i, DatumFactory.createText(lastBigInt.add(UnsignedInteger.valueOf(inc)).bigIntegerValue().toByteArray())); } break; case DATE: diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java index 055e9a2056..973a4df0fb 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java @@ -55,6 +55,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.math.BigDecimal; +import java.math.BigInteger; import java.net.URI; import java.util.*; import java.util.Map.Entry; @@ -567,6 +568,7 @@ public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerCo // calculate the number of maximum query ranges TableStats totalStat = computeChildBlocksStats(subQuery.getContext(), masterPlan, subQuery.getId()); + System.out.println(totalStat); // If there is an empty table in inner join, it should return zero rows. if (totalStat.getNumBytes() == 0 && totalStat.getColumnStats().size() == 0 ) { @@ -574,12 +576,12 @@ public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerCo } TupleRange mergedRange = TupleUtil.columnStatToRange(sortSpecs, sortSchema, totalStat.getColumnStats(), false); RangePartitionAlgorithm partitioner = new UniformRangePartition(mergedRange, sortSpecs); - BigDecimal card = partitioner.getTotalCardinality(); + BigInteger card = partitioner.getTotalCardinality(); // if the number of the range cardinality is less than the desired number of tasks, // we set the the number of tasks to the number of range cardinality. int determinedTaskNum; - if (card.compareTo(new BigDecimal(maxNum)) < 0) { + if (card.compareTo(BigInteger.valueOf(maxNum)) < 0) { LOG.info(subQuery.getId() + ", The range cardinality (" + card + ") is less then the desired number of tasks (" + maxNum + ")"); determinedTaskNum = card.intValue(); @@ -636,8 +638,9 @@ public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerCo for (FetchImpl fetch: fetches) { String rangeParam = TupleUtil.rangeToQuery(ranges[i], ascendingFirstKey ? i == (ranges.length - 1) : i == 0, encoder); - fetch.setRangeParams(rangeParam); - fetchSet.add(fetch); + FetchImpl copy = new FetchImpl(fetch.getProto()); + copy.setRangeParams(rangeParam); + fetchSet.add(copy); } map.put(ranges[i], fetchSet); } diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java index d191c3bf4d..b8b3f0e3f5 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java @@ -394,6 +394,38 @@ public void testPartitionForMultipleChars() { assertTrue(ranges[47].getEnd().equals(e)); } + @Test + public void testPartitionForMultipleCharsWithSameFirstChar() { + Schema schema = new Schema() + .addColumn("KEY1", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + + Tuple s = new VTuple(1); + s.put(0, DatumFactory.createText("AAA")); + Tuple e = new VTuple(1); + e.put(0, DatumFactory.createText("AAZ")); + + final int partNum = 4; + + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner = + new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(partNum); + + TupleRange prev = null; + for (TupleRange r : ranges) { + if (prev == null) { + prev = r; + } else { + assertTrue(prev.compareTo(r) < 0); + } + } + assertEquals(partNum, ranges.length); + assertTrue(ranges[0].getStart().equals(s)); + assertTrue(ranges[partNum - 1].getEnd().equals(e)); + } + @Test public void testPartitionForOnePartNumWithBothValueNull() { Schema schema = new Schema() From 983dd11365010a977df9a75e511d25ceabc5b9b2 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Tue, 22 Jul 2014 18:11:27 +0900 Subject: [PATCH 19/21] Conflicts: tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java --- CHANGES | 7 + .../java/org/apache/tajo/catalog/Schema.java | 19 +- .../java/org/apache/tajo/util/BytesUtils.java | 1 + .../tajo/engine/planner/ExprAnnotator.java | 26 +- .../tajo/engine/planner/ExprNormalizer.java | 13 +- .../tajo/engine/planner/LogicalPlan.java | 256 ++------------- .../planner/LogicalPlanPreprocessor.java | 32 +- .../tajo/engine/planner/LogicalPlanner.java | 49 ++- .../engine/planner/UniformRangePartition.java | 6 +- .../planner/logical/TableSubQueryNode.java | 1 - .../planner/nameresolver/NameResolver.java | 291 ++++++++++++++++++ .../nameresolver/NameResolvingMode.java | 80 +++++ .../nameresolver/ResolverByLegacy.java | 126 ++++++++ .../planner/nameresolver/ResolverByRels.java | 38 +++ .../ResolverByRelsAndSubExprs.java | 42 +++ .../ResolverBySubExprsAndRels.java | 42 +++ .../tajo/master/querymaster/QueryUnit.java | 10 +- .../master/querymaster/Repartitioner.java | 8 +- .../org/apache/tajo/worker/FetchImpl.java | 24 +- .../tajo/engine/eval/TestEvalTreeUtil.java | 4 +- .../tajo/engine/query/TestCaseByCases.java | 7 + .../tajo/engine/query/TestSelectQuery.java | 27 ++ .../TestCaseByCases/testTAJO917Case1.sql | 13 + .../TestSelectQuery/testNonQualifiedNames.sql | 1 + ...stSelectColumnAliasExistingInRelation1.sql | 1 + ...stSelectColumnAliasExistingInRelation2.sql | 1 + .../TestCaseByCases/testTAJO917Case1.result | 7 + .../testNonQualifiedNames.result | 7 + ...electColumnAliasExistingInRelation1.result | 4 + ...electColumnAliasExistingInRelation2.result | 7 + .../org/apache/tajo/storage/RowStoreUtil.java | 40 ++- 31 files changed, 917 insertions(+), 273 deletions(-) create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolver.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolvingMode.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByLegacy.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRels.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRelsAndSubExprs.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverBySubExprsAndRels.java create mode 100644 tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO917Case1.sql create mode 100644 tajo-core/src/test/resources/queries/TestSelectQuery/testNonQualifiedNames.sql create mode 100644 tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation1.sql create mode 100644 tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation2.sql create mode 100644 tajo-core/src/test/resources/results/TestCaseByCases/testTAJO917Case1.result create mode 100644 tajo-core/src/test/resources/results/TestSelectQuery/testNonQualifiedNames.result create mode 100644 tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation1.result create mode 100644 tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation2.result diff --git a/CHANGES b/CHANGES index 40100428e2..8a1aae651b 100644 --- a/CHANGES +++ b/CHANGES @@ -97,6 +97,13 @@ Release 0.9.0 - unreleased BUG FIXES + TAJO-666: java.nio.BufferOverflowException occurs when the query includes an order by + clause on a TEXT column. (Mai Hai Thanh via jihoon) + + TAJO-939: Refactoring the column resolver in LogicalPlan. (hyunsik) + + TAJO-965: Upgrade Bytes class and move some methods to others. (hyunsik) + TAJO-961: TajoCli should exit when at least one query faces error while executing a SQL script. (hyunsik) diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java index 35d2fe9225..3aee5d2e3d 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java @@ -95,11 +95,22 @@ private void init() { * @param qualifier The qualifier */ public void setQualifier(String qualifier) { + Schema copy = null; + try { + copy = (Schema) clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + + fields.clear(); fieldsByQualifiedName.clear(); - for (int i = 0; i < size(); i++) { - Column column = fields.get(i); - fields.set(i, new Column(qualifier + "." + column.getSimpleName(), column.getDataType())); - fieldsByQualifiedName.put(fields.get(i).getQualifiedName(), i); + fieldsByName.clear(); + + Column newColumn; + for (int i = 0; i < copy.size(); i++) { + Column column = copy.getColumn(i); + newColumn = new Column(qualifier + "." + column.getSimpleName(), column.getDataType()); + addColumn(newColumn); } } diff --git a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java index 9e0b24877f..61d12c2a1b 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java @@ -180,6 +180,7 @@ private static byte[][] splitWorker(byte[] str, int offset, int length, char sep return (byte[][]) list.toArray(new byte[list.size()][]); } + public static byte[][] padBytes(byte []...bytes) { byte [][] padded = new byte[bytes.length][]; diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprAnnotator.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprAnnotator.java index 2c386b2f03..39fd08acf5 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprAnnotator.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprAnnotator.java @@ -32,6 +32,8 @@ import org.apache.tajo.engine.function.AggFunction; import org.apache.tajo.engine.function.GeneralFunction; import org.apache.tajo.engine.planner.logical.NodeType; +import org.apache.tajo.engine.planner.nameresolver.NameResolvingMode; +import org.apache.tajo.engine.planner.nameresolver.NameResolver; import org.apache.tajo.exception.InternalException; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TUtil; @@ -66,16 +68,19 @@ public ExprAnnotator(CatalogService catalog) { static class Context { LogicalPlan plan; LogicalPlan.QueryBlock currentBlock; + NameResolvingMode columnRsvLevel; - public Context(LogicalPlan plan, LogicalPlan.QueryBlock block) { + public Context(LogicalPlan plan, LogicalPlan.QueryBlock block, NameResolvingMode colRsvLevel) { this.plan = plan; this.currentBlock = block; + this.columnRsvLevel = colRsvLevel; } } - public EvalNode createEvalNode(LogicalPlan plan, LogicalPlan.QueryBlock block, Expr expr) + public EvalNode createEvalNode(LogicalPlan plan, LogicalPlan.QueryBlock block, Expr expr, + NameResolvingMode colRsvLevel) throws PlanningException { - Context context = new Context(plan, block); + Context context = new Context(plan, block, colRsvLevel); return AlgebraicUtil.eliminateConstantExprs(visit(context, new Stack(), expr)); } @@ -540,7 +545,20 @@ public EvalNode visitSign(Context ctx, Stack stack, SignedExpr expr) throw @Override public EvalNode visitColumnReference(Context ctx, Stack stack, ColumnReferenceExpr expr) throws PlanningException { - Column column = ctx.plan.resolveColumn(ctx.currentBlock, expr); + Column column; + + switch (ctx.columnRsvLevel) { + case LEGACY: + column = ctx.plan.resolveColumn(ctx.currentBlock, expr); + break; + case RELS_ONLY: + case RELS_AND_SUBEXPRS: + case SUBEXPRS_AND_RELS: + column = NameResolver.resolve(ctx.plan, ctx.currentBlock, expr, ctx.columnRsvLevel); + break; + default: + throw new PlanningException("Unsupported column resolving level: " + ctx.columnRsvLevel.name()); + } return new FieldEval(column); } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java index 81bbd41247..5b61b74ec1 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java @@ -18,16 +18,16 @@ package org.apache.tajo.engine.planner; -import com.google.common.collect.Sets; import com.google.common.collect.Sets; import org.apache.tajo.algebra.*; import org.apache.tajo.catalog.CatalogUtil; import org.apache.tajo.engine.exception.NoSuchColumnException; +import org.apache.tajo.engine.planner.nameresolver.NameResolvingMode; +import org.apache.tajo.engine.planner.nameresolver.NameResolver; import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.Set; import java.util.Stack; /** @@ -330,11 +330,18 @@ public Expr visitCastExpr(ExprNormalizedResult ctx, Stack stack, CastExpr @Override public Expr visitColumnReference(ExprNormalizedResult ctx, Stack stack, ColumnReferenceExpr expr) throws PlanningException { + + if (ctx.block.isAliasedName(expr.getCanonicalName())) { + String originalName = ctx.block.getOriginalName(expr.getCanonicalName()); + expr.setName(originalName); + return expr; + } // if a column reference is not qualified, it finds and sets the qualified column name. if (!(expr.hasQualifier() && CatalogUtil.isFQTableName(expr.getQualifier()))) { if (!ctx.block.namedExprsMgr.contains(expr.getCanonicalName()) && expr.getType() == OpType.Column) { try { - String normalized = ctx.plan.getNormalizedColumnName(ctx.block, expr); + String normalized = + NameResolver.resolve(ctx.plan, ctx.block, expr, NameResolvingMode.LEGACY).getQualifiedName(); expr.setName(normalized); } catch (NoSuchColumnException nsc) { } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlan.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlan.java index 4e1d3137d6..86bacefbd5 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlan.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlan.java @@ -19,23 +19,20 @@ package org.apache.tajo.engine.planner; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.commons.lang.ObjectUtils; import org.apache.tajo.algebra.*; import org.apache.tajo.annotation.NotThreadSafe; -import org.apache.tajo.catalog.CatalogUtil; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.Schema; import org.apache.tajo.engine.eval.EvalNode; -import org.apache.tajo.engine.exception.AmbiguousFieldException; -import org.apache.tajo.engine.exception.NoSuchColumnException; -import org.apache.tajo.engine.exception.VerifyException; import org.apache.tajo.engine.planner.graph.DirectedGraphCursor; import org.apache.tajo.engine.planner.graph.SimpleDirectedGraph; import org.apache.tajo.engine.planner.logical.LogicalNode; import org.apache.tajo.engine.planner.logical.LogicalRootNode; import org.apache.tajo.engine.planner.logical.NodeType; import org.apache.tajo.engine.planner.logical.RelationNode; +import org.apache.tajo.engine.planner.nameresolver.NameResolvingMode; +import org.apache.tajo.engine.planner.nameresolver.NameResolver; import org.apache.tajo.util.TUtil; import java.lang.reflect.Constructor; @@ -49,7 +46,6 @@ public class LogicalPlan { /** the prefix character for virtual tables */ public static final char VIRTUAL_TABLE_PREFIX='#'; public static final char NONAMED_COLUMN_PREFIX='?'; - public static final char NONAMED_WINDOW_PREFIX='^'; /** it indicates the root block */ public static final String ROOT_BLOCK = VIRTUAL_TABLE_PREFIX + "ROOT"; @@ -58,7 +54,6 @@ public class LogicalPlan { private int nextPid = 0; private Integer noNameBlockId = 0; private Integer noNameColumnId = 0; - private Integer noNameWindowId = 0; /** a map from between a block name to a block plan */ private Map queryBlocks = new LinkedHashMap(); @@ -266,230 +261,8 @@ public SimpleDirectedGraph getQueryBlockGraph() { return queryBlockGraph; } - public String getNormalizedColumnName(QueryBlock block, ColumnReferenceExpr columnRef) - throws PlanningException { - Column found = resolveColumn(block, columnRef); - if (found == null) { - throw new NoSuchColumnException(columnRef.getCanonicalName()); - } - return found.getQualifiedName(); - } - - public String resolveDatabase(QueryBlock block, String tableName) throws PlanningException { - List found = new ArrayList(); - for (RelationNode relation : block.getRelations()) { - // check alias name or table name - if (CatalogUtil.extractSimpleName(relation.getCanonicalName()).equals(tableName) || - CatalogUtil.extractSimpleName(relation.getTableName()).equals(tableName)) { - // obtain the database name - found.add(CatalogUtil.extractQualifier(relation.getTableName())); - } - } - - if (found.size() == 0) { - return null; - } else if (found.size() > 1) { - throw new PlanningException("Ambiguous table name \"" + tableName + "\""); - } - - return found.get(0); - } - - /** - * It resolves a column. - */ public Column resolveColumn(QueryBlock block, ColumnReferenceExpr columnRef) throws PlanningException { - if (columnRef.hasQualifier()) { - return resolveColumnWithQualifier(block, columnRef); - } else { - return resolveColumnWithoutQualifier(block, columnRef); - } - } - - private Column resolveColumnWithQualifier(QueryBlock block, ColumnReferenceExpr columnRef) throws PlanningException { - String qualifier; - String canonicalName; - String qualifiedName; - - if (CatalogUtil.isFQTableName(columnRef.getQualifier())) { - qualifier = columnRef.getQualifier(); - canonicalName = columnRef.getCanonicalName(); - } else { - String resolvedDatabaseName = resolveDatabase(block, columnRef.getQualifier()); - if (resolvedDatabaseName == null) { - throw new NoSuchColumnException(columnRef.getQualifier()); - } - qualifier = CatalogUtil.buildFQName(resolvedDatabaseName, columnRef.getQualifier()); - canonicalName = CatalogUtil.buildFQName(qualifier, columnRef.getName()); - } - qualifiedName = CatalogUtil.buildFQName(qualifier, columnRef.getName()); - - RelationNode relationOp = block.getRelation(qualifier); - - // if a column name is outside of this query block - if (relationOp == null) { - // TODO - nested query can only refer outer query block? or not? - for (QueryBlock eachBlock : queryBlocks.values()) { - if (eachBlock.existsRelation(qualifier)) { - relationOp = eachBlock.getRelation(qualifier); - } - } - } - - // If we cannot find any relation against a qualified column name - if (relationOp == null) { - throw new NoSuchColumnException(canonicalName); - } - - if (block.isAlreadyRenamedTableName(CatalogUtil.extractQualifier(canonicalName))) { - String changedName = CatalogUtil.buildFQName( - relationOp.getCanonicalName(), - CatalogUtil.extractSimpleName(canonicalName)); - canonicalName = changedName; - } - - Schema schema = relationOp.getTableSchema(); - Column column = schema.getColumn(canonicalName); - if (column == null) { - throw new NoSuchColumnException(canonicalName); - } - - // If code reach here, a column is found. - // But, it may be aliased from bottom logical node. - // If the column is aliased, the found name may not be used in upper node. - - // Here, we try to check if column reference is already aliased. - // If so, it replaces the name with aliased name. - LogicalNode currentNode = block.getCurrentNode(); - - // The condition (currentNode.getInSchema().contains(column)) means - // the column can be used at the current node. So, we don't need to find aliase name. - Schema currentNodeSchema = null; - if (currentNode != null) { - if (currentNode instanceof RelationNode) { - currentNodeSchema = ((RelationNode) currentNode).getTableSchema(); - } else { - currentNodeSchema = currentNode.getInSchema(); - } - } - - if (currentNode != null && !currentNodeSchema.contains(column) - && currentNode.getType() != NodeType.TABLE_SUBQUERY) { - List candidates = TUtil.newList(); - if (block.namedExprsMgr.isAliased(qualifiedName)) { - String alias = block.namedExprsMgr.getAlias(canonicalName); - Column found = resolveColumn(block, new ColumnReferenceExpr(alias)); - if (found != null) { - candidates.add(found); - } - } - if (!candidates.isEmpty()) { - return ensureUniqueColumn(candidates); - } - } - - return column; - } - - private Column resolveColumnWithoutQualifier(QueryBlock block, - ColumnReferenceExpr columnRef)throws PlanningException { - - List candidates = TUtil.newList(); - - // It tries to find a full qualified column name from all relations in the current block. - for (RelationNode rel : block.getRelations()) { - Column found = rel.getTableSchema().getColumn(columnRef.getName()); - if (found != null) { - candidates.add(found); - } - } - - if (!candidates.isEmpty()) { - return ensureUniqueColumn(candidates); - } - - // Trying to find the column within the current block - if (block.currentNode != null && block.currentNode.getInSchema() != null) { - Column found = block.currentNode.getInSchema().getColumn(columnRef.getCanonicalName()); - if (found != null) { - return found; - } - } - - if (block.getLatestNode() != null) { - Column found = block.getLatestNode().getOutSchema().getColumn(columnRef.getName()); - if (found != null) { - return found; - } - } - - - // Trying to find columns from aliased references. - if (block.namedExprsMgr.isAliased(columnRef.getCanonicalName())) { - String originalName = block.namedExprsMgr.getAlias(columnRef.getCanonicalName()); - Column found = resolveColumn(block, new ColumnReferenceExpr(originalName)); - if (found != null) { - candidates.add(found); - } - } - if (!candidates.isEmpty()) { - return ensureUniqueColumn(candidates); - } - - // This is an exception case. It means that there are some bugs in other parts. - LogicalNode blockRootNode = block.getRoot(); - if (blockRootNode != null && blockRootNode.getOutSchema().getColumn(columnRef.getCanonicalName()) != null) { - throw new NoSuchColumnException("ERROR: no such a column name "+ columnRef.getCanonicalName()); - } - - // Trying to find columns from other relations in other blocks - for (QueryBlock eachBlock : queryBlocks.values()) { - for (RelationNode rel : eachBlock.getRelations()) { - Column found = rel.getTableSchema().getColumn(columnRef.getName()); - if (found != null) { - candidates.add(found); - } - } - } - - if (!candidates.isEmpty()) { - return ensureUniqueColumn(candidates); - } - - // Trying to find columns from schema in current block. - if (block.getSchema() != null) { - Column found = block.getSchema().getColumn(columnRef.getName()); - if (found != null) { - candidates.add(found); - } - } - - if (!candidates.isEmpty()) { - return ensureUniqueColumn(candidates); - } - - throw new NoSuchColumnException("ERROR: no such a column name "+ columnRef.getCanonicalName()); - } - - private static Column ensureUniqueColumn(List candidates) - throws VerifyException { - if (candidates.size() == 1) { - return candidates.get(0); - } else if (candidates.size() > 2) { - StringBuilder sb = new StringBuilder(); - boolean first = true; - for (Column column : candidates) { - if (first) { - first = false; - } else { - sb.append(", "); - } - sb.append(column); - } - throw new AmbiguousFieldException("Ambiguous Column Name: " + sb.toString()); - } else { - return null; - } + return NameResolver.resolve(this, block, columnRef, NameResolvingMode.LEGACY); } public String getQueryGraphAsString() { @@ -601,7 +374,8 @@ public class QueryBlock { // transient states private final Map canonicalNameToRelationMap = TUtil.newHashMap(); - private final Map> aliasMap = TUtil.newHashMap(); + private final Map> relationAliasMap = TUtil.newHashMap(); + private final Map columnAliasMap = TUtil.newHashMap(); private final Map> operatorToExprMap = TUtil.newHashMap(); private final List relationList = TUtil.newList(); private boolean hasWindowFunction = false; @@ -669,7 +443,7 @@ public boolean existsRelation(String name) { } public boolean isAlreadyRenamedTableName(String name) { - return aliasMap.containsKey(name); + return relationAliasMap.containsKey(name); } public RelationNode getRelation(String name) { @@ -677,8 +451,8 @@ public RelationNode getRelation(String name) { return canonicalNameToRelationMap.get(name); } - if (aliasMap.containsKey(name)) { - return canonicalNameToRelationMap.get(aliasMap.get(name).get(0)); + if (relationAliasMap.containsKey(name)) { + return canonicalNameToRelationMap.get(relationAliasMap.get(name).get(0)); } return null; @@ -686,7 +460,7 @@ public RelationNode getRelation(String name) { public void addRelation(RelationNode relation) { if (relation.hasAlias()) { - TUtil.putToNestedList(aliasMap, relation.getTableName(), relation.getCanonicalName()); + TUtil.putToNestedList(relationAliasMap, relation.getTableName(), relation.getCanonicalName()); } canonicalNameToRelationMap.put(relation.getCanonicalName(), relation); relationList.add(relation); @@ -700,6 +474,18 @@ public boolean hasTableExpression() { return this.canonicalNameToRelationMap.size() > 0; } + public void addColumnAlias(String original, String alias) { + columnAliasMap.put(alias, original); + } + + public boolean isAliasedName(String alias) { + return columnAliasMap.containsKey(alias); + } + + public String getOriginalName(String alias) { + return columnAliasMap.get(alias); + } + public void setSchema(Schema schema) { this.schema = schema; } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanPreprocessor.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanPreprocessor.java index 4f1218f2ba..84fe6c2fb9 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanPreprocessor.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanPreprocessor.java @@ -26,6 +26,8 @@ import org.apache.tajo.engine.exception.NoSuchColumnException; import org.apache.tajo.engine.planner.LogicalPlan.QueryBlock; import org.apache.tajo.engine.planner.logical.*; +import org.apache.tajo.engine.planner.nameresolver.NameResolvingMode; +import org.apache.tajo.engine.planner.nameresolver.NameResolver; import org.apache.tajo.engine.utils.SchemaUtil; import org.apache.tajo.master.session.Session; import org.apache.tajo.util.TUtil; @@ -196,7 +198,19 @@ public LogicalNode visitProjection(PreprocessContext ctx, Stack stack, Pro expr.setNamedExprs(rewrittenTargets.toArray(new NamedExpr[rewrittenTargets.size()])); } + // 1) Normalize field names into full qualified names + // 2) Register explicit column aliases to block NamedExpr[] projectTargetExprs = expr.getNamedExprs(); + NameRefInSelectListNormalizer normalizer = new NameRefInSelectListNormalizer(); + for (int i = 0; i < expr.getNamedExprs().length; i++) { + NamedExpr namedExpr = projectTargetExprs[i]; + normalizer.visit(ctx, new Stack(), namedExpr.getExpr()); + + if (namedExpr.getExpr().getType() == OpType.Column && namedExpr.hasAlias()) { + ctx.currentBlock.addColumnAlias(((ColumnReferenceExpr)namedExpr.getExpr()).getCanonicalName(), + namedExpr.getAlias()); + } + } Target [] targets; targets = new Target[projectTargetExprs.length]; @@ -217,6 +231,8 @@ public LogicalNode visitProjection(PreprocessContext ctx, Stack stack, Pro ProjectionNode projectionNode = ctx.plan.createNode(ProjectionNode.class); projectionNode.setInSchema(child.getOutSchema()); projectionNode.setOutSchema(PlannerUtil.targetToSchema(targets)); + + ctx.currentBlock.setSchema(projectionNode.getOutSchema()); return projectionNode; } @@ -267,7 +283,8 @@ public LogicalNode visitGroupBy(PreprocessContext ctx, Stack stack, Aggreg for (int i = 0; i < finalTargetNum; i++) { NamedExpr namedExpr = projection.getNamedExprs()[i]; - EvalNode evalNode = annotator.createEvalNode(ctx.plan, ctx.currentBlock, namedExpr.getExpr()); + EvalNode evalNode = annotator.createEvalNode(ctx.plan, ctx.currentBlock, namedExpr.getExpr(), + NameResolvingMode.SUBEXPRS_AND_RELS); if (namedExpr.hasAlias()) { targets[i] = new Target(evalNode, namedExpr.getAlias()); @@ -449,4 +466,17 @@ public LogicalNode visitInsert(PreprocessContext ctx, Stack stack, Insert insertNode.setOutSchema(child.getOutSchema()); return insertNode; } + + class NameRefInSelectListNormalizer extends SimpleAlgebraVisitor { + @Override + public Expr visitColumnReference(PreprocessContext ctx, Stack stack, ColumnReferenceExpr expr) + throws PlanningException { + + String normalized = NameResolver.resolve(ctx.plan, ctx.currentBlock, expr, + NameResolvingMode.RELS_ONLY).getQualifiedName(); + expr.setName(normalized); + + return expr; + } + } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java index ea517c012c..a4820cb46e 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java @@ -40,6 +40,7 @@ import org.apache.tajo.engine.exception.VerifyException; import org.apache.tajo.engine.planner.LogicalPlan.QueryBlock; import org.apache.tajo.engine.planner.logical.*; +import org.apache.tajo.engine.planner.nameresolver.NameResolvingMode; import org.apache.tajo.engine.planner.rewrite.ProjectionPushDownRule; import org.apache.tajo.engine.utils.SchemaUtil; import org.apache.tajo.master.session.Session; @@ -51,6 +52,7 @@ import java.util.*; import static org.apache.tajo.algebra.CreateTable.PartitionType; + import static org.apache.tajo.engine.planner.ExprNormalizer.ExprNormalizedResult; import static org.apache.tajo.engine.planner.LogicalPlan.BlockType; import static org.apache.tajo.engine.planner.LogicalPlanPreprocessor.PreprocessContext; @@ -260,7 +262,8 @@ private void setRawTargets(PlanContext context, Target[] targets, String[] refer Target [] rawTargets = new Target[projection.getNamedExprs().length]; for (int i = 0; i < projection.getNamedExprs().length; i++) { NamedExpr namedExpr = projection.getNamedExprs()[i]; - EvalNode evalNode = exprAnnotator.createEvalNode(plan, block, namedExpr.getExpr()); + EvalNode evalNode = exprAnnotator.createEvalNode(plan, block, namedExpr.getExpr(), + NameResolvingMode.RELS_AND_SUBEXPRS); rawTargets[i] = new Target(evalNode, referenceNames[i]); } // it's for debugging or unit testing @@ -383,7 +386,7 @@ private EvalExprNode buildPlanForNoneFromStatement(PlanContext context, Stack it = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); it.hasNext();) { NamedExpr rawTarget = it.next(); try { - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr()); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr(), + NameResolvingMode.SUBEXPRS_AND_RELS); if (evalNode.getType() == EvalType.WINDOW_FUNCTION) { winFuncRefs.add(rawTarget.getAlias()); winFuncs.add((WindowFunctionEval) evalNode); @@ -678,7 +683,8 @@ private LogicalNode insertGroupbyNode(PlanContext context, LogicalNode child, St try { // check if at least distinct aggregation function includeDistinctFunction |= PlannerUtil.existsDistinctAggregationFunction(rawTarget.getExpr()); - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr()); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr(), + NameResolvingMode.SUBEXPRS_AND_RELS); if (evalNode.getType() == EvalType.AGG_FUNCTION) { aggEvalNames.add(rawTarget.getAlias()); aggEvals.add((AggregationFunctionCallEval) evalNode); @@ -712,7 +718,8 @@ public LimitNode visitLimit(PlanContext context, Stack stack, Limit limit) EvalNode firstFetNum; LogicalNode child; if (limit.getFetchFirstNum().getType() == OpType.Literal) { - firstFetNum = exprAnnotator.createEvalNode(context.plan, block, limit.getFetchFirstNum()); + firstFetNum = exprAnnotator.createEvalNode(context.plan, block, limit.getFetchFirstNum(), + NameResolvingMode.RELS_ONLY); //////////////////////////////////////////////////////// // Visit and Build Child Plan @@ -739,7 +746,8 @@ public LimitNode visitLimit(PlanContext context, Stack stack, Limit limit) firstFetNum = block.namedExprsMgr.getTarget(referName).getEvalTree(); } else { NamedExpr namedExpr = block.namedExprsMgr.getNamedExpr(referName); - firstFetNum = exprAnnotator.createEvalNode(context.plan, block, namedExpr.getExpr()); + firstFetNum = exprAnnotator.createEvalNode(context.plan, block, namedExpr.getExpr(), + NameResolvingMode.SUBEXPRS_AND_RELS); block.namedExprsMgr.markAsEvaluated(referName, firstFetNum); } } @@ -835,7 +843,8 @@ public LogicalNode visitHaving(PlanContext context, Stack stack, Having ex havingCondition = block.namedExprsMgr.getTarget(referName).getEvalTree(); } else { NamedExpr namedExpr = block.namedExprsMgr.getNamedExpr(referName); - havingCondition = exprAnnotator.createEvalNode(context.plan, block, namedExpr.getExpr()); + havingCondition = exprAnnotator.createEvalNode(context.plan, block, namedExpr.getExpr(), + NameResolvingMode.SUBEXPRS_AND_RELS); block.namedExprsMgr.markAsEvaluated(referName, havingCondition); } @@ -902,7 +911,8 @@ public LogicalNode visitGroupBy(PlanContext context, Stack stack, Aggregat NamedExpr namedExpr = iterator.next(); try { includeDistinctFunction |= PlannerUtil.existsDistinctAggregationFunction(namedExpr.getExpr()); - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, namedExpr.getExpr()); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, namedExpr.getExpr(), + NameResolvingMode.SUBEXPRS_AND_RELS); if (evalNode.getType() == EvalType.AGG_FUNCTION) { block.namedExprsMgr.markAsEvaluated(namedExpr.getAlias(), evalNode); aggEvalNames.add(namedExpr.getAlias()); @@ -988,7 +998,8 @@ public SelectionNode visitFilter(PlanContext context, Stack stack, Selecti selectionNode.setOutSchema(child.getOutSchema()); // Create EvalNode for a search condition. - EvalNode searchCondition = exprAnnotator.createEvalNode(context.plan, block, selection.getQual()); + EvalNode searchCondition = exprAnnotator.createEvalNode(context.plan, block, selection.getQual(), + NameResolvingMode.RELS_AND_SUBEXPRS); EvalNode simplified = AlgebraicUtil.eliminateConstantExprs(searchCondition); // set selection condition selectionNode.setQual(simplified); @@ -1041,7 +1052,8 @@ public LogicalNode visitJoin(PlanContext context, Stack stack, Join join) // Create EvalNode for a search condition. EvalNode joinCondition = null; if (join.hasQual()) { - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, block, join.getQual()); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, block, join.getQual(), + NameResolvingMode.LEGACY); joinCondition = AlgebraicUtil.eliminateConstantExprs(evalNode); } @@ -1071,7 +1083,7 @@ private List getNewlyEvaluatedExprsForJoin(LogicalPlan plan, QueryBlock for (Iterator it = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); it.hasNext();) { NamedExpr namedExpr = it.next(); try { - evalNode = exprAnnotator.createEvalNode(plan, block, namedExpr.getExpr()); + evalNode = exprAnnotator.createEvalNode(plan, block, namedExpr.getExpr(), NameResolvingMode.LEGACY); if (LogicalPlanner.checkIfBeEvaluatedAtJoin(block, evalNode, joinNode, stack.peek().getType() != OpType.Join)) { block.namedExprsMgr.markAsEvaluated(namedExpr.getAlias(), evalNode); newlyEvaluatedExprs.add(namedExpr.getAlias()); @@ -1141,7 +1153,7 @@ private LogicalNode createCartesianProduct(PlanContext context, LogicalNode left for (Iterator it = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); it.hasNext();) { NamedExpr namedExpr = it.next(); try { - evalNode = exprAnnotator.createEvalNode(plan, block, namedExpr.getExpr()); + evalNode = exprAnnotator.createEvalNode(plan, block, namedExpr.getExpr(), NameResolvingMode.LEGACY); if (EvalTreeUtil.findDistinctAggFunction(evalNode).size() == 0) { block.namedExprsMgr.markAsEvaluated(namedExpr.getAlias(), evalNode); newlyEvaluatedExprs.add(namedExpr.getAlias()); @@ -1193,7 +1205,8 @@ public ScanNode visitRelation(PlanContext context, Stack stack, Relation e for (Iterator iterator = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); iterator.hasNext();) { NamedExpr rawTarget = iterator.next(); try { - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr()); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr(), + NameResolvingMode.RELS_ONLY); if (checkIfBeEvaluatedAtRelation(block, evalNode, scanNode)) { block.namedExprsMgr.markAsEvaluated(rawTarget.getAlias(), evalNode); newlyEvaluatedExprsReferences.add(rawTarget.getAlias()); // newly added exr @@ -1208,8 +1221,9 @@ public ScanNode visitRelation(PlanContext context, Stack stack, Relation e // The fact the some expr is included in newlyEvaluatedExprsReferences means that it is already evaluated. // So, we get a raw expression and then creates a target. for (String reference : newlyEvaluatedExprsReferences) { - NamedExpr refrrer = block.namedExprsMgr.getNamedExpr(reference); - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, block, refrrer.getExpr()); + NamedExpr refrer = block.namedExprsMgr.getNamedExpr(reference); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, block, refrer.getExpr(), + NameResolvingMode.RELS_ONLY); targets.add(new Target(evalNode, reference)); } @@ -1267,7 +1281,8 @@ public TableSubQueryNode visitTableSubQuery(PlanContext context, Stack sta Set newlyEvaluatedExprs = TUtil.newHashSet(); for (NamedExpr rawTarget : block.namedExprsMgr.getAllNamedExprs()) { try { - EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr()); + EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr(), + NameResolvingMode.RELS_ONLY); if (checkIfBeEvaluatedAtRelation(block, evalNode, subQueryNode)) { block.namedExprsMgr.markAsEvaluated(rawTarget.getAlias(), evalNode); newlyEvaluatedExprs.add(rawTarget.getAlias()); // newly added exr diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java index edb24f7dfe..92da211fd1 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java @@ -21,6 +21,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.primitives.UnsignedInteger; +import com.google.common.primitives.UnsignedLong; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.datum.Datum; @@ -425,8 +426,9 @@ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { end.put(i, DatumFactory.createText(((char) (range.getStart().get(i).asChars().charAt(0) + incs[i].longValue())) + "")); } else { - UnsignedInteger lastBigInt = UnsignedInteger.valueOf(new BigInteger(last.get(i).asByteArray())); - end.put(i, DatumFactory.createText(lastBigInt.add(UnsignedInteger.valueOf(inc)).bigIntegerValue().toByteArray())); + BigInteger lastBigInt = UnsignedLong.valueOf(new BigInteger(last.get(i).asByteArray())).bigIntegerValue(); + BigInteger incBigInt = UnsignedLong.asUnsigned(inc).bigIntegerValue(); + end.put(i, DatumFactory.createText(lastBigInt.add(incBigInt).toByteArray())); } break; case DATE: diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/TableSubQueryNode.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/TableSubQueryNode.java index 4d0090b31e..3c808fcd78 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/TableSubQueryNode.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/TableSubQueryNode.java @@ -86,7 +86,6 @@ public void setSubQuery(LogicalNode node) { } else { setOutSchema(SchemaUtil.clone(this.subQuery.getOutSchema())); } - getOutSchema().setQualifier(this.tableName); } public LogicalNode getSubQuery() { diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolver.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolver.java new file mode 100644 index 0000000000..aee5d43b6f --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolver.java @@ -0,0 +1,291 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.nameresolver; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.tajo.algebra.ColumnReferenceExpr; +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.engine.exception.AmbiguousFieldException; +import org.apache.tajo.engine.exception.NoSuchColumnException; +import org.apache.tajo.engine.exception.VerifyException; +import org.apache.tajo.engine.planner.LogicalPlan; +import org.apache.tajo.engine.planner.PlanningException; +import org.apache.tajo.engine.planner.logical.RelationNode; +import org.apache.tajo.util.Pair; +import org.apache.tajo.util.TUtil; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * NameResolver utility + */ +public abstract class NameResolver { + + public static Map resolverMap = Maps.newHashMap(); + + static { + resolverMap.put(NameResolvingMode.RELS_ONLY, new ResolverByRels()); + resolverMap.put(NameResolvingMode.RELS_AND_SUBEXPRS, new ResolverByRelsAndSubExprs()); + resolverMap.put(NameResolvingMode.SUBEXPRS_AND_RELS, new ResolverBySubExprsAndRels()); + resolverMap.put(NameResolvingMode.LEGACY, new ResolverByLegacy()); + } + + abstract Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) + throws PlanningException; + + /** + * Try to find the database name + * + * @param block the current block + * @param tableName The table name + * @return The found database name + * @throws PlanningException + */ + public static String resolveDatabase(LogicalPlan.QueryBlock block, String tableName) throws PlanningException { + List found = new ArrayList(); + for (RelationNode relation : block.getRelations()) { + // check alias name or table name + if (CatalogUtil.extractSimpleName(relation.getCanonicalName()).equals(tableName) || + CatalogUtil.extractSimpleName(relation.getTableName()).equals(tableName)) { + // obtain the database name + found.add(CatalogUtil.extractQualifier(relation.getTableName())); + } + } + + if (found.size() == 0) { + return null; + } else if (found.size() > 1) { + throw new PlanningException("Ambiguous table name \"" + tableName + "\""); + } + + return found.get(0); + } + + public static Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr column, + NameResolvingMode mode) throws PlanningException { + if (!resolverMap.containsKey(mode)) { + throw new PlanningException("Unsupported name resolving level: " + mode.name()); + } + return resolverMap.get(mode).resolve(plan, block, column); + } + + /** + * Try to find a column from all relations within a given query block. + * If a given column reference is qualified, it tries to resolve the name + * from only the relation corresponding to the qualifier. + * + * @param plan The logical plan + * @param block The current query block + * @param columnRef The column reference to be found + * @return The found column + * @throws PlanningException + */ + static Column resolveFromRelsWithinBlock(LogicalPlan plan, LogicalPlan.QueryBlock block, + ColumnReferenceExpr columnRef) throws PlanningException { + String qualifier; + String canonicalName; + + if (columnRef.hasQualifier()) { + Pair normalized = normalizeQualifierAndCanonicalName(block, columnRef); + qualifier = normalized.getFirst(); + canonicalName = normalized.getSecond(); + + RelationNode relationOp = block.getRelation(qualifier); + + // If we cannot find any relation against a qualified column name + if (relationOp == null) { + throw null; + } + + // Please consider a query case: + // select lineitem.l_orderkey from lineitem a order by lineitem.l_orderkey; + // + // The relation lineitem is already renamed to "a", but lineitem.l_orderkey still can be used. + // The below code makes it available. Otherwise, it cannot find any match in the relation schema. + if (block.isAlreadyRenamedTableName(CatalogUtil.extractQualifier(canonicalName))) { + canonicalName = + CatalogUtil.buildFQName(relationOp.getCanonicalName(), CatalogUtil.extractSimpleName(canonicalName)); + } + + Schema schema = relationOp.getTableSchema(); + Column column = schema.getColumn(canonicalName); + + return column; + } else { + return resolveFromAllRelsInBlock(block, columnRef); + } + } + + /** + * Try to find the column from the current node and child node. It can find subexprs generated from the optimizer. + * + * @param block The current query block + * @param columnRef The column reference to be found + * @return The found column + */ + static Column resolveFromCurrentAndChildNode(LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) + throws NoSuchColumnException { + + if (block.getCurrentNode() != null && block.getCurrentNode().getInSchema() != null) { + Column found = block.getCurrentNode().getInSchema().getColumn(columnRef.getCanonicalName()); + if (found != null) { + return found; + } else if (block.getLatestNode() != null) { + found = block.getLatestNode().getOutSchema().getColumn(columnRef.getName()); + if (found != null) { + return found; + } + } + } + return null; + } + + /** + * It tries to find a full qualified column name from all relations in the current block. + * + * @param block The current query block + * @param columnRef The column reference to be found + * @return The found column + */ + static Column resolveFromAllRelsInBlock(LogicalPlan.QueryBlock block, + ColumnReferenceExpr columnRef) throws VerifyException { + List candidates = TUtil.newList(); + + for (RelationNode rel : block.getRelations()) { + Column found = rel.getTableSchema().getColumn(columnRef.getName()); + if (found != null) { + candidates.add(found); + } + } + + if (!candidates.isEmpty()) { + return ensureUniqueColumn(candidates); + } else { + return null; + } + } + + /** + * Trying to find a column from all relations in other blocks + * + * @param plan The logical plan + * @param columnRef The column reference to be found + * @return The found column + */ + static Column resolveFromAllRelsInAllBlocks(LogicalPlan plan, ColumnReferenceExpr columnRef) throws VerifyException { + + List candidates = Lists.newArrayList(); + + // from all relations of all query blocks + for (LogicalPlan.QueryBlock eachBlock : plan.getQueryBlocks()) { + + for (RelationNode rel : eachBlock.getRelations()) { + Column found = rel.getTableSchema().getColumn(columnRef.getName()); + if (found != null) { + candidates.add(found); + } + } + } + + if (!candidates.isEmpty()) { + return NameResolver.ensureUniqueColumn(candidates); + } else { + return null; + } + } + + /** + * Try to find a column from the final schema of the current block. + * + * @param block The current query block + * @param columnRef The column reference to be found + * @return The found column + */ + static Column resolveAliasedName(LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) throws VerifyException { + List candidates = Lists.newArrayList(); + + if (block.getSchema() != null) { + Column found = block.getSchema().getColumn(columnRef.getName()); + if (found != null) { + candidates.add(found); + } + } + + if (!candidates.isEmpty()) { + return NameResolver.ensureUniqueColumn(candidates); + } else { + return null; + } + } + + /** + * It returns a pair of names, which the first value is ${database}.${table} and the second value + * is a simple column name. + * + * @param block The current block + * @param columnRef The column name + * @return A pair of normalized qualifier and column name + * @throws PlanningException + */ + static Pair normalizeQualifierAndCanonicalName(LogicalPlan.QueryBlock block, + ColumnReferenceExpr columnRef) + throws PlanningException { + String qualifier; + String canonicalName; + + if (CatalogUtil.isFQTableName(columnRef.getQualifier())) { + qualifier = columnRef.getQualifier(); + canonicalName = columnRef.getCanonicalName(); + } else { + String resolvedDatabaseName = resolveDatabase(block, columnRef.getQualifier()); + if (resolvedDatabaseName == null) { + throw new NoSuchColumnException(columnRef.getQualifier()); + } + qualifier = CatalogUtil.buildFQName(resolvedDatabaseName, columnRef.getQualifier()); + canonicalName = CatalogUtil.buildFQName(qualifier, columnRef.getName()); + } + + return new Pair(qualifier, canonicalName); + } + + static Column ensureUniqueColumn(List candidates) throws VerifyException { + if (candidates.size() == 1) { + return candidates.get(0); + } else if (candidates.size() > 2) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Column column : candidates) { + if (first) { + first = false; + } else { + sb.append(", "); + } + sb.append(column); + } + throw new AmbiguousFieldException("Ambiguous Column Name: " + sb.toString()); + } else { + return null; + } + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolvingMode.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolvingMode.java new file mode 100644 index 0000000000..dcbb5f1b96 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/NameResolvingMode.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.nameresolver; + +/** + * + *

Motivation

+ * + * Please take a look at the following example query: + * + *
+ *   select (l_orderkey + l_orderkey) l_orderkey from lineitem where l_orderkey > 2 order by l_orderkey;
+ * 
+ * + * Although l_orderkey seems to be ambiguous, the above usages are available in commercial DBMSs. + * In order to eliminate the ambiguity, Tajo follows the behaviors of PostgreSQL. + * + *

Resolving Modes

+ * + * From the behaviors of PostgreSQL, we found that there are three kinds of name resolving modes. + * Each definition is as follows: + * + *
    + *
  • RELS_ONLY finds a column from the relations in the current block. + *
  • RELS_AND_SUBEXPRS finds a column from the all relations in the current block and + * from aliased temporal fields; a temporal field means an explicitly aliased expression. If there are duplicated + * columns in the relation and temporal fields, this level firstly chooses the field in a relation.
  • + *
  • SUBEXPRS_AND_RELS is very similar to RELS_AND_SUBEXPRS. The main difference is that it + * firstly chooses an aliased temporal field instead of the fields in a relation.
  • + *
+ * + *

The relationship between resolving modes and operators

+ * + *
    + *
  • fields in select list and LIMIT are resolved in the REL_ONLY mode.
  • + *
  • fields in WHERE clause are resolved in the RELS_AND_SUBEXPRS mode.
  • + *
  • fields in GROUP BY, HAVING, and ORDER BY are resolved in the SUBEXPRS_AND_RELS mode.
  • + *
+ * + *

Example

+ * + * Please revisit the aforementioned example: + * + *
+ *   select (l_orderkey + l_orderkey) l_orderkey from lineitem where l_orderkey > 2 order by l_orderkey;
+ * 
+ * + * With the above rules and the relationship between modes and operators, we can easily identify which reference + * points to which field. + *
    + *
  1. l_orderkey included in (l_orderkey + l_orderkey) points to the field + * in the relation lineitem.
  2. + *
  3. l_orderkey included in WHERE clause also points to the field in the relation + * lineitem.
  4. + *
  5. l_orderkey included in ORDER BY clause points to the temporal field + * (l_orderkey + l_orderkey).
  6. + *
+ */ +public enum NameResolvingMode { + RELS_ONLY, // finding from only relations + RELS_AND_SUBEXPRS, // finding from relations and subexprs in a place + SUBEXPRS_AND_RELS, // finding from subexprs and relations in a place + LEGACY // Finding in a legacy manner (globally) +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByLegacy.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByLegacy.java new file mode 100644 index 0000000000..396bc1b184 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByLegacy.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.nameresolver; + +import org.apache.tajo.algebra.ColumnReferenceExpr; +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.engine.exception.NoSuchColumnException; +import org.apache.tajo.engine.planner.LogicalPlan; +import org.apache.tajo.engine.planner.PlanningException; +import org.apache.tajo.engine.planner.logical.LogicalNode; +import org.apache.tajo.engine.planner.logical.NodeType; +import org.apache.tajo.engine.planner.logical.RelationNode; +import org.apache.tajo.util.Pair; +import org.apache.tajo.util.TUtil; + +import java.util.List; + +public class ResolverByLegacy extends NameResolver { + @Override + public Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) + throws PlanningException { + + if (columnRef.hasQualifier()) { + return resolveColumnWithQualifier(plan, block, columnRef); + } else { + return resolveColumnWithoutQualifier(plan, block, columnRef); + } + } + + private static Column resolveColumnWithQualifier(LogicalPlan plan, LogicalPlan.QueryBlock block, + ColumnReferenceExpr columnRef) throws PlanningException { + final String qualifier; + String canonicalName; + final String qualifiedName; + + Pair normalized = normalizeQualifierAndCanonicalName(block, columnRef); + qualifier = normalized.getFirst(); + canonicalName = normalized.getSecond(); + qualifiedName = CatalogUtil.buildFQName(qualifier, columnRef.getName()); + + Column found = resolveFromRelsWithinBlock(plan, block, columnRef); + if (found == null) { + throw new NoSuchColumnException(columnRef.getCanonicalName()); + } + + // If code reach here, a column is found. + // But, it may be aliased from bottom logical node. + // If the column is aliased, the found name may not be used in upper node. + + // Here, we try to check if column reference is already aliased. + // If so, it replaces the name with aliased name. + LogicalNode currentNode = block.getCurrentNode(); + + // The condition (currentNode.getInSchema().contains(column)) means + // the column can be used at the current node. So, we don't need to find aliase name. + Schema currentNodeSchema = null; + if (currentNode != null) { + if (currentNode instanceof RelationNode) { + currentNodeSchema = ((RelationNode) currentNode).getTableSchema(); + } else { + currentNodeSchema = currentNode.getInSchema(); + } + } + + if (currentNode != null && !currentNodeSchema.contains(found) + && currentNode.getType() != NodeType.TABLE_SUBQUERY) { + List candidates = TUtil.newList(); + if (block.getNamedExprsManager().isAliased(qualifiedName)) { + String alias = block.getNamedExprsManager().getAlias(canonicalName); + found = resolve(plan, block, new ColumnReferenceExpr(alias), NameResolvingMode.LEGACY); + if (found != null) { + candidates.add(found); + } + } + if (!candidates.isEmpty()) { + return ensureUniqueColumn(candidates); + } + } + + return found; + } + + static Column resolveColumnWithoutQualifier(LogicalPlan plan, LogicalPlan.QueryBlock block, + ColumnReferenceExpr columnRef)throws PlanningException { + + Column found = resolveFromAllRelsInBlock(block, columnRef); + if (found != null) { + return found; + } + + found = resolveAliasedName(block, columnRef); + if (found != null) { + return found; + } + + found = resolveFromCurrentAndChildNode(block, columnRef); + if (found != null) { + return found; + } + + found = resolveFromAllRelsInAllBlocks(plan, columnRef); + if (found != null) { + return found; + } + + throw new NoSuchColumnException("ERROR: no such a column name "+ columnRef.getCanonicalName()); + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRels.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRels.java new file mode 100644 index 0000000000..9713e5251a --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRels.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.nameresolver; + +import org.apache.tajo.algebra.ColumnReferenceExpr; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.engine.exception.NoSuchColumnException; +import org.apache.tajo.engine.planner.LogicalPlan; +import org.apache.tajo.engine.planner.PlanningException; + +public class ResolverByRels extends NameResolver { + @Override + public Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) + throws PlanningException { + + Column column = resolveFromRelsWithinBlock(plan, block, columnRef); + if (column == null) { + throw new NoSuchColumnException(columnRef.getCanonicalName()); + } + return column; + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRelsAndSubExprs.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRelsAndSubExprs.java new file mode 100644 index 0000000000..7ca3c535d3 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverByRelsAndSubExprs.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.nameresolver; + +import org.apache.tajo.algebra.ColumnReferenceExpr; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.engine.exception.NoSuchColumnException; +import org.apache.tajo.engine.planner.LogicalPlan; +import org.apache.tajo.engine.planner.PlanningException; + +public class ResolverByRelsAndSubExprs extends NameResolver { + @Override + public Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) + throws PlanningException { + + Column column = resolveFromRelsWithinBlock(plan, block, columnRef); + if (column == null) { + column = resolveFromCurrentAndChildNode(block, columnRef); + } + + if (column == null) { + throw new NoSuchColumnException(columnRef.getCanonicalName()); + } + return column; + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverBySubExprsAndRels.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverBySubExprsAndRels.java new file mode 100644 index 0000000000..7337ecee5e --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/nameresolver/ResolverBySubExprsAndRels.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner.nameresolver; + +import org.apache.tajo.algebra.ColumnReferenceExpr; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.engine.exception.NoSuchColumnException; +import org.apache.tajo.engine.planner.LogicalPlan; +import org.apache.tajo.engine.planner.PlanningException; + +public class ResolverBySubExprsAndRels extends NameResolver { + @Override + public Column resolve(LogicalPlan plan, LogicalPlan.QueryBlock block, ColumnReferenceExpr columnRef) + throws PlanningException { + + Column column = resolveFromCurrentAndChildNode(block, columnRef); + if (column == null) { + column = resolveFromRelsWithinBlock(plan, block, columnRef); + } + + if (column == null) { + throw new NoSuchColumnException(columnRef.getCanonicalName()); + } + return column; + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java index 806c0f18d0..8c953bdef8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java @@ -626,7 +626,7 @@ public List getIntermediateData() { return this.intermediateData; } - public static class PullHost { + public static class PullHost implements Cloneable { String host; int port; public PullHost(String pullServerAddr, int pullServerPort){ @@ -659,6 +659,14 @@ public boolean equals(Object obj) { return false; } + + @Override + public PullHost clone() throws CloneNotSupportedException { + PullHost newPullHost = (PullHost) super.clone(); + newPullHost.host = host; + newPullHost.port = port; + return newPullHost; + } } public static class IntermediateEntry { diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java index 973a4df0fb..b31b6a511b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java @@ -568,7 +568,6 @@ public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerCo // calculate the number of maximum query ranges TableStats totalStat = computeChildBlocksStats(subQuery.getContext(), masterPlan, subQuery.getId()); - System.out.println(totalStat); // If there is an empty table in inner join, it should return zero rows. if (totalStat.getNumBytes() == 0 && totalStat.getColumnStats().size() == 0 ) { @@ -638,7 +637,12 @@ public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerCo for (FetchImpl fetch: fetches) { String rangeParam = TupleUtil.rangeToQuery(ranges[i], ascendingFirstKey ? i == (ranges.length - 1) : i == 0, encoder); - FetchImpl copy = new FetchImpl(fetch.getProto()); + FetchImpl copy = null; + try { + copy = fetch.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } copy.setRangeParams(rangeParam); fetchSet.add(copy); } diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java b/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java index 9d1f4286e7..869c1066d0 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java @@ -20,6 +20,7 @@ import com.google.common.base.Objects; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import org.apache.tajo.ExecutionBlockId; import org.apache.tajo.common.ProtoObject; import org.apache.tajo.ipc.TajoWorkerProtocol; @@ -33,7 +34,7 @@ /** * FetchImpl information to indicate the locations of intermediate data. */ -public class FetchImpl implements ProtoObject { +public class FetchImpl implements ProtoObject, Cloneable { private TajoWorkerProtocol.FetchProto.Builder builder = null; private QueryUnit.PullHost host; // The pull server host information @@ -110,6 +111,7 @@ public TajoWorkerProtocol.FetchProto getProto() { builder.setPartitionId(partitionId); builder.setHasNext(hasNext); builder.setName(name); + if (rangeParams != null && !rangeParams.isEmpty()) { builder.setRangeParams(rangeParams); } @@ -198,4 +200,24 @@ public List getTaskIds() { public List getAttemptIds() { return attemptIds; } + + public FetchImpl clone() throws CloneNotSupportedException { + FetchImpl newFetchImpl = (FetchImpl) super.clone(); + + newFetchImpl.builder = TajoWorkerProtocol.FetchProto.newBuilder(); + newFetchImpl.host = host.clone(); + newFetchImpl.type = type; + newFetchImpl.executionBlockId = executionBlockId; + newFetchImpl.partitionId = partitionId; + newFetchImpl.name = name; + newFetchImpl.rangeParams = rangeParams; + newFetchImpl.hasNext = hasNext; + if (taskIds != null) { + newFetchImpl.taskIds = Lists.newArrayList(taskIds); + } + if (attemptIds != null) { + newFetchImpl.attemptIds = Lists.newArrayList(attemptIds); + } + return newFetchImpl; + } } diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/eval/TestEvalTreeUtil.java b/tajo-core/src/test/java/org/apache/tajo/engine/eval/TestEvalTreeUtil.java index 92770ec09f..7bb619d815 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/eval/TestEvalTreeUtil.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/eval/TestEvalTreeUtil.java @@ -39,6 +39,7 @@ import org.apache.tajo.engine.planner.Target; import org.apache.tajo.engine.planner.logical.GroupbyNode; import org.apache.tajo.engine.planner.logical.NodeType; +import org.apache.tajo.engine.planner.nameresolver.NameResolvingMode; import org.apache.tajo.exception.InternalException; import org.apache.tajo.master.TajoMaster; import org.apache.tajo.master.session.Session; @@ -154,7 +155,8 @@ public static EvalNode getRootSelection(String query) throws PlanningException { } Selection selection = plan.getRootBlock().getSingletonExpr(OpType.Filter); - return planner.getExprAnnotator().createEvalNode(plan, plan.getRootBlock(), selection.getQual()); + return planner.getExprAnnotator().createEvalNode(plan, plan.getRootBlock(), selection.getQual(), + NameResolvingMode.RELS_AND_SUBEXPRS); } @Test diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java index 73df4e18a3..459a4c167e 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java @@ -133,4 +133,11 @@ public final void testTAJO880_3() throws Exception { assertEquals(expected, resultSetToString(res)); cleanupQuery(res); } + + @Test + public final void testTAJO917Case1() throws Exception { + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } } diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java index 639c3efd99..8898067fe4 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java @@ -48,6 +48,14 @@ public TestSelectQuery() { super(TajoConstants.DEFAULT_DATABASE_NAME); } + @Test + public final void testNonQualifiedNames() throws Exception { + // select l_orderkey, l_partkey from lineitem; + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + @Test public final void testNonFromSelect1() throws Exception { // select upper('abc'); @@ -112,6 +120,25 @@ public final void testSelectColumnAlias1() throws Exception { cleanupQuery(res); } + @Test + public final void testSelectColumnAliasExistingInRelation1() throws Exception { + // We intend that 'l_orderkey' in where clause points to "default.lineitem.l_orderkey" + // select (l_orderkey + l_orderkey) l_orderkey from lineitem where l_orderkey > 2; + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + + @Test + public final void testSelectColumnAliasExistingInRelation2() throws Exception { + // We intend that 'l_orderkey' in orderby clause points to (-l_orderkey). + // select (-l_orderkey) as l_orderkey from lineitem order by l_orderkey; + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + + @Test public final void testSelectSameConstantsWithDifferentAliases() throws Exception { // select l_orderkey, '20130819' as date1, '20130819' as date2 from lineitem where l_orderkey > -1; diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO917Case1.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO917Case1.sql new file mode 100644 index 0000000000..5b3039ca07 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO917Case1.sql @@ -0,0 +1,13 @@ +select + temp.r_regionkey as r_regionkey +from + ( + select + region.r_regionkey as r_regionkey + from + region + ) temp +join + region b +on + temp.r_regionkey = b.r_regionkey; \ No newline at end of file diff --git a/tajo-core/src/test/resources/queries/TestSelectQuery/testNonQualifiedNames.sql b/tajo-core/src/test/resources/queries/TestSelectQuery/testNonQualifiedNames.sql new file mode 100644 index 0000000000..0c176b72ad --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSelectQuery/testNonQualifiedNames.sql @@ -0,0 +1 @@ +select l_orderkey, l_partkey from lineitem; \ No newline at end of file diff --git a/tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation1.sql b/tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation1.sql new file mode 100644 index 0000000000..91170e3468 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation1.sql @@ -0,0 +1 @@ +select (l_orderkey + l_orderkey) l_orderkey from lineitem where l_orderkey > 2; \ No newline at end of file diff --git a/tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation2.sql b/tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation2.sql new file mode 100644 index 0000000000..89f63fd772 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSelectQuery/testSelectColumnAliasExistingInRelation2.sql @@ -0,0 +1 @@ +select (-l_orderkey) as l_orderkey from lineitem order by l_orderkey; \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO917Case1.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO917Case1.result new file mode 100644 index 0000000000..5dbe646243 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO917Case1.result @@ -0,0 +1,7 @@ +r_regionkey +------------------------------- +0 +1 +2 +3 +4 \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestSelectQuery/testNonQualifiedNames.result b/tajo-core/src/test/resources/results/TestSelectQuery/testNonQualifiedNames.result new file mode 100644 index 0000000000..13785365c7 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSelectQuery/testNonQualifiedNames.result @@ -0,0 +1,7 @@ +l_orderkey,l_partkey +------------------------------- +1,1 +1,1 +2,2 +3,2 +3,3 \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation1.result b/tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation1.result new file mode 100644 index 0000000000..55e2b42174 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation1.result @@ -0,0 +1,4 @@ +l_orderkey +------------------------------- +6 +6 \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation2.result b/tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation2.result new file mode 100644 index 0000000000..f0cf700625 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSelectQuery/testSelectColumnAliasExistingInRelation2.result @@ -0,0 +1,7 @@ +l_orderkey +------------------------------- +-3 +-3 +-2 +-1 +-1 \ No newline at end of file diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/RowStoreUtil.java b/tajo-storage/src/main/java/org/apache/tajo/storage/RowStoreUtil.java index 5140a6374b..dc453a946d 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/RowStoreUtil.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/RowStoreUtil.java @@ -174,7 +174,7 @@ private RowStoreEncoder(Schema schema) { } public byte [] toBytes(Tuple tuple) { nullFlags.clear(); - int size = 4096; // 4kb + int size = estimateTupleDataSize(tuple); ByteBuffer bb = ByteBuffer.allocate(size + headerSize); bb.position(headerSize); Column col; @@ -236,6 +236,44 @@ private RowStoreEncoder(Schema schema) { return buf; } + // Note that, NULL values are treated separately + private int estimateTupleDataSize(Tuple tuple) { + int size = 0; + Column col; + + for (int i = 0; i < schema.size(); i++) { + if (tuple.isNull(i)) { + continue; + } + + col = schema.getColumn(i); + switch (col.getDataType().getType()) { + case BOOLEAN: + case BIT: + case CHAR: size += 1; break; + case INT2: size += 2; break; + case DATE: + case INT4: + case FLOAT4: size += 4; break; + case TIME: + case TIMESTAMP: + case INT8: + case FLOAT8: size += 8; break; + case INTERVAL: size += 12; break; + case TEXT: + case BLOB: size += (4 + tuple.get(i).asByteArray().length); break; + case INET4: + case INET6: size += tuple.get(i).asByteArray().length; break; + default: + size += 4; + } + } + + size += 100; // optimistic reservation + + return size; + } + public Schema getSchema() { return schema; } From 78e8f17c2cd7b27a576a3e9a095fa3e3e3d63c61 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Tue, 22 Jul 2014 19:19:16 +0900 Subject: [PATCH 20/21] Changing the bytes normalization --- .../engine/planner/UniformRangePartition.java | 25 ++++++-- .../master/querymaster/Repartitioner.java | 4 +- .../planner/TestUniformRangePartition.java | 60 ++++++++++++++----- 3 files changed, 66 insertions(+), 23 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java index 92da211fd1..1a4590660a 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java @@ -24,6 +24,7 @@ import com.google.common.primitives.UnsignedLong; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; +import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.engine.exception.RangeOverflowException; @@ -51,9 +52,21 @@ public class UniformRangePartition extends RangePartitionAlgorithm { * @param sortSpecs The description of sort keys * @param inclusive true if the end of the range is inclusive */ - public UniformRangePartition(TupleRange totalRange, SortSpec[] sortSpecs, boolean inclusive) { + public UniformRangePartition(final TupleRange totalRange, final SortSpec[] sortSpecs, boolean inclusive) { super(sortSpecs, totalRange, inclusive); colCards = new BigInteger[sortSpecs.length]; + + int [] maxLens = new int[sortSpecs.length]; + for (int i = 0; i < sortSpecs.length; i++) { + maxLens[i] = Integer.MIN_VALUE; + } + + for (int i = 0; i < sortSpecs.length; i++) { + if (sortSpecs[i].getSortKey().getDataType().getType() == TajoDataTypes.Type.TEXT) { + maxLens[i] = Math.max(maxLens[i], totalRange.getStart().get(i).asByteArray().length); + } + } + for (int i = 0; i < sortSpecs.length; i++) { colCards[i] = computeCardinality(sortSpecs[i].getSortKey().getDataType(), totalRange.getStart().get(i), totalRange.getEnd().get(i), inclusive, sortSpecs[i].isAscending()); @@ -108,7 +121,7 @@ public TupleRange[] partition(int partNum) { if (reminder.compareTo(term) <= 0) { // final one is inclusive ranges.add(new TupleRange(sortSpecs, last, range.getEnd())); } else { - Tuple next = increment(last, term.longValue(), variableId); + Tuple next = increment(last, term, variableId); ranges.add(new TupleRange(sortSpecs, last, next)); } last = ranges.get(ranges.size() - 1).getEnd(); @@ -309,14 +322,14 @@ public long incrementAndGetReminder(int colId, Datum last, long inc) { /** * * @param last - * @param inc + * @param interval * @return */ - public Tuple increment(final Tuple last, final long inc, final int baseDigit) { + public Tuple increment(final Tuple last, BigInteger interval, final int baseDigit) { BigInteger [] incs = new BigInteger[last.size()]; boolean [] overflowFlag = new boolean[last.size()]; BigInteger [] result; - BigInteger value = BigInteger.valueOf(inc); + BigInteger value = interval; BigInteger [] reverseCardsForDigit = new BigInteger[baseDigit + 1]; for (int i = baseDigit; i >= 0; i--) { @@ -427,7 +440,7 @@ public Tuple increment(final Tuple last, final long inc, final int baseDigit) { + incs[i].longValue())) + "")); } else { BigInteger lastBigInt = UnsignedLong.valueOf(new BigInteger(last.get(i).asByteArray())).bigIntegerValue(); - BigInteger incBigInt = UnsignedLong.asUnsigned(inc).bigIntegerValue(); + BigInteger incBigInt = interval; end.put(i, DatumFactory.createText(lastBigInt.add(incBigInt).toByteArray())); } break; diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java index b31b6a511b..9acd6f405f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java @@ -588,9 +588,7 @@ public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerCo determinedTaskNum = maxNum; } - // for LOG - TupleRange mergedRangeForPrint = TupleUtil.columnStatToRange(sortSpecs, sortSchema, totalStat.getColumnStats(), true); - LOG.info(subQuery.getId() + ", Try to divide " + mergedRangeForPrint + " into " + determinedTaskNum + + LOG.info(subQuery.getId() + ", Try to divide " + mergedRange + " into " + determinedTaskNum + " sub ranges (total units: " + determinedTaskNum + ")"); TupleRange [] ranges = partitioner.partition(determinedTaskNum); if (ranges == null || ranges.length == 0) { diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java index b8b3f0e3f5..3199c57f4f 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java @@ -73,11 +73,11 @@ public void testIncrement1() { result[10] = "DB"; result[11] = "DC"; - Tuple end = partitioner.increment(s, 1, 1); + Tuple end = partitioner.increment(s, BigInteger.valueOf(1), 1); assertEquals("A", end.get(0).asChars()); assertEquals("B", end.get(1).asChars()); for (int i = 2; i < 11; i++ ) { - end = partitioner.increment(end, 1, 1); + end = partitioner.increment(end, BigInteger.valueOf(1), 1); assertEquals(result[i].charAt(0), end.get(0).asChars().charAt(0)); assertEquals(result[i].charAt(1), end.get(1).asChars().charAt(0)); } @@ -120,10 +120,10 @@ public void testIncrement2() { result[10] = "DB"; result[11] = "DC"; - Tuple end = partitioner.increment(s, 6, 1); + Tuple end = partitioner.increment(s, BigInteger.valueOf(6), 1); assertEquals("C", end.get(0).asChars()); assertEquals("A", end.get(1).asChars()); - end = partitioner.increment(end, 5, 1); + end = partitioner.increment(end, BigInteger.valueOf(5), 1); assertEquals("D", end.get(0).asChars()); assertEquals("C", end.get(1).asChars()); } @@ -154,11 +154,11 @@ public void testIncrement3() { UniformRangePartition partitioner = new UniformRangePartition(expected, sortSpecs); assertEquals(24, partitioner.getTotalCardinality().intValue()); - Tuple overflowBefore = partitioner.increment(s, 5, 2); + Tuple overflowBefore = partitioner.increment(s, BigInteger.valueOf(5), 2); assertEquals("A", overflowBefore.get(0).asChars()); assertEquals("B", overflowBefore.get(1).asChars()); assertEquals("C", overflowBefore.get(2).asChars()); - Tuple overflowed = partitioner.increment(overflowBefore, 1, 2); + Tuple overflowed = partitioner.increment(overflowBefore, BigInteger.valueOf(1), 2); assertEquals("B", overflowed.get(0).asChars()); assertEquals("A", overflowed.get(1).asChars()); assertEquals("A", overflowed.get(2).asChars()); @@ -184,10 +184,10 @@ public void testIncrement4() { UniformRangePartition partitioner = new UniformRangePartition(expected, sortSpecs); assertEquals(200, partitioner.getTotalCardinality().longValue()); - Tuple range2 = partitioner.increment(s, 100, 1); + Tuple range2 = partitioner.increment(s, BigInteger.valueOf(100), 1); assertEquals(15, range2.get(0).asInt4()); assertEquals(20, range2.get(1).asInt4()); - Tuple range3 = partitioner.increment(range2, 99, 1); + Tuple range3 = partitioner.increment(range2, BigInteger.valueOf(99), 1); assertEquals(19, range3.get(0).asInt4()); assertEquals(39, range3.get(1).asInt4()); } @@ -214,11 +214,11 @@ public void testIncrement4() { UniformRangePartition partitioner = new UniformRangePartition(expected, sortSpecs); assertEquals(24, partitioner.getTotalCardinality().longValue()); - Tuple beforeOverflow = partitioner.increment(s, 5, 2); + Tuple beforeOverflow = partitioner.increment(s, BigInteger.valueOf(5), 2); assertEquals(1, beforeOverflow.get(0).asInt8()); assertEquals(2, beforeOverflow.get(1).asInt8()); assertEquals(3, beforeOverflow.get(2).asInt8()); - Tuple overflow = partitioner.increment(beforeOverflow, 1, 2); + Tuple overflow = partitioner.increment(beforeOverflow, BigInteger.valueOf(1), 2); assertEquals(2, overflow.get(0).asInt8()); assertEquals(1, overflow.get(1).asInt8()); assertEquals(1, overflow.get(2).asInt8()); @@ -247,11 +247,11 @@ public void testIncrement6() { UniformRangePartition partitioner = new UniformRangePartition(expected, sortSpecs); assertEquals(24, partitioner.getTotalCardinality().longValue()); - Tuple beforeOverflow = partitioner.increment(s, 5, 2); + Tuple beforeOverflow = partitioner.increment(s, BigInteger.valueOf(5), 2); assertTrue(1.1d == beforeOverflow.get(0).asFloat8()); assertTrue(2.1d == beforeOverflow.get(1).asFloat8()); assertTrue(3.1d == beforeOverflow.get(2).asFloat8()); - Tuple overflow = partitioner.increment(beforeOverflow, 1, 2); + Tuple overflow = partitioner.increment(beforeOverflow, BigInteger.valueOf(1), 2); assertTrue(2.1d == overflow.get(0).asFloat8()); assertTrue(1.1d == overflow.get(1).asFloat8()); assertTrue(1.1d == overflow.get(2).asFloat8()); @@ -280,11 +280,11 @@ public void testIncrement7() { UniformRangePartition partitioner = new UniformRangePartition(expected, sortSpecs); assertEquals(24, partitioner.getTotalCardinality().longValue()); - Tuple beforeOverflow = partitioner.increment(s, 5, 2); + Tuple beforeOverflow = partitioner.increment(s, BigInteger.valueOf(5), 2); assertTrue("127.0.1.1".equals(beforeOverflow.get(0).asChars())); assertTrue("127.0.0.2".equals(beforeOverflow.get(1).asChars())); assertTrue("128.0.0.255".equals(beforeOverflow.get(2).asChars())); - Tuple overflow = partitioner.increment(beforeOverflow, 1, 2); + Tuple overflow = partitioner.increment(beforeOverflow, BigInteger.valueOf(1), 2); assertTrue("127.0.1.2".equals(overflow.get(0).asChars())); assertTrue("127.0.0.1".equals(overflow.get(1).asChars())); assertTrue("128.0.0.253".equals(overflow.get(2).asChars())); @@ -394,6 +394,38 @@ public void testPartitionForMultipleChars() { assertTrue(ranges[47].getEnd().equals(e)); } + @Test + public void testPartitionForMultipleChars2() { + Schema schema = new Schema() + .addColumn("KEY1", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + + Tuple s = new VTuple(1); + s.put(0, DatumFactory.createText("A1")); + Tuple e = new VTuple(1); + e.put(0, DatumFactory.createText("A999975")); + + final int partNum = 2; + + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner = + new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(partNum); + + TupleRange prev = null; + for (TupleRange r : ranges) { + if (prev == null) { + prev = r; + } else { + assertTrue(prev.compareTo(r) < 0); + } + } + assertEquals(partNum, ranges.length); + assertTrue(ranges[0].getStart().equals(s)); + assertTrue(ranges[partNum - 1].getEnd().equals(e)); + } + @Test public void testPartitionForMultipleCharsWithSameFirstChar() { Schema schema = new Schema() From 9ade82fdd36224e60c10b01702f183ae993eb60e Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Wed, 23 Jul 2014 19:54:27 +0900 Subject: [PATCH 21/21] Completed uniform splitter of multiple character range. --- .../java/org/apache/tajo/util/BytesUtils.java | 4 + .../exception/RangeOverflowException.java | 5 +- .../planner/RangePartitionAlgorithm.java | 19 +- .../engine/planner/UniformRangePartition.java | 166 +++++++++++------- .../planner/TestUniformRangePartition.java | 35 +++- 5 files changed, 143 insertions(+), 86 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java index 61d12c2a1b..9dfd6d48d2 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/BytesUtils.java @@ -203,4 +203,8 @@ public static byte[][] padBytes(byte []...bytes) { return padded; } + + public static byte [] trimBytes(byte [] bytes) { + return new String(bytes).trim().getBytes(); + } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/exception/RangeOverflowException.java b/tajo-core/src/main/java/org/apache/tajo/engine/exception/RangeOverflowException.java index 409d6ed1ca..013c9c27e2 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/exception/RangeOverflowException.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/exception/RangeOverflowException.java @@ -22,7 +22,8 @@ import org.apache.tajo.storage.TupleRange; public class RangeOverflowException extends RuntimeException { - public RangeOverflowException(TupleRange range, Tuple overflowValue, long inc) { - super("Overflow Error: tried to increase " + inc + " to " + overflowValue + ", but the range " + range); + public RangeOverflowException(TupleRange range, Tuple overflowValue, long inc, boolean ascending) { + super("Overflow Error: tried to " + (ascending ? "increase " : "decrease ") + inc + " to " + overflowValue + + ", but the range " + range); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java index 5c889d9eab..db53cd771f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java @@ -18,23 +18,18 @@ package org.apache.tajo.engine.planner; -import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.common.TajoDataTypes.DataType; import org.apache.tajo.datum.Datum; -import org.apache.tajo.datum.NullDatum; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.TupleRange; import org.apache.tajo.util.Bytes; -import org.apache.tajo.util.BytesUtils; -import org.apache.tajo.util.Pair; -import java.math.BigDecimal; import java.math.BigInteger; public abstract class RangePartitionAlgorithm { protected SortSpec [] sortSpecs; - protected TupleRange range; + protected TupleRange mergedRange; protected final BigInteger totalCard; /** true if the end of the range is inclusive. Otherwise, it should be false. */ protected final boolean inclusive; @@ -47,7 +42,7 @@ public abstract class RangePartitionAlgorithm { */ public RangePartitionAlgorithm(SortSpec [] sortSpecs, TupleRange totalRange, boolean inclusive) { this.sortSpecs = sortSpecs; - this.range = totalRange; + this.mergedRange = totalRange; this.inclusive = inclusive; this.totalCard = computeCardinalityForAllColumns(sortSpecs, totalRange, inclusive); } @@ -120,8 +115,6 @@ public static BigInteger computeCardinality(DataType dataType, Datum start, Datu } break; case TEXT: { - byte [] aPadded; - byte [] bPadded; byte [] a; byte [] b; if (isAscending) { @@ -132,13 +125,9 @@ public static BigInteger computeCardinality(DataType dataType, Datum start, Datu a = end.asByteArray(); } - byte [][] padded = BytesUtils.padBytes(a, b); - aPadded = padded[0]; - bPadded = padded[1]; - byte [] prependHeader = {1, 0}; - final BigInteger startBI = new BigInteger(Bytes.add(prependHeader, aPadded)); - final BigInteger stopBI = new BigInteger(Bytes.add(prependHeader, bPadded)); + final BigInteger startBI = new BigInteger(Bytes.add(prependHeader, a)); + final BigInteger stopBI = new BigInteger(Bytes.add(prependHeader, b)); BigInteger diffBI = stopBI.subtract(startBI); columnCard = diffBI; break; diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java index 1a4590660a..9e82603446 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java @@ -20,7 +20,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import com.google.common.primitives.UnsignedInteger; import com.google.common.primitives.UnsignedLong; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; @@ -33,7 +32,6 @@ import org.apache.tajo.storage.VTuple; import org.apache.tajo.util.Bytes; import org.apache.tajo.util.BytesUtils; -import org.apache.tajo.util.Pair; import java.math.BigDecimal; import java.math.BigInteger; @@ -56,16 +54,7 @@ public UniformRangePartition(final TupleRange totalRange, final SortSpec[] sortS super(sortSpecs, totalRange, inclusive); colCards = new BigInteger[sortSpecs.length]; - int [] maxLens = new int[sortSpecs.length]; - for (int i = 0; i < sortSpecs.length; i++) { - maxLens[i] = Integer.MIN_VALUE; - } - - for (int i = 0; i < sortSpecs.length; i++) { - if (sortSpecs[i].getSortKey().getDataType().getType() == TajoDataTypes.Type.TEXT) { - maxLens[i] = Math.max(maxLens[i], totalRange.getStart().get(i).asByteArray().length); - } - } + normalize(sortSpecs, this.mergedRange); for (int i = 0; i < sortSpecs.length; i++) { colCards[i] = computeCardinality(sortSpecs[i].getSortKey().getDataType(), totalRange.getStart().get(i), @@ -116,21 +105,62 @@ public TupleRange[] partition(int partNum) { BigInteger term = x.divide(BigDecimal.valueOf(partNum), RoundingMode.CEILING).toBigInteger(); BigInteger reminder = reverseCardsForDigit[0]; - Tuple last = range.getStart(); + Tuple last = mergedRange.getStart(); + TupleRange tupleRange; while(reminder.compareTo(BigInteger.ZERO) > 0) { if (reminder.compareTo(term) <= 0) { // final one is inclusive - ranges.add(new TupleRange(sortSpecs, last, range.getEnd())); + tupleRange = new TupleRange(sortSpecs, last, mergedRange.getEnd()); } else { Tuple next = increment(last, term, variableId); - ranges.add(new TupleRange(sortSpecs, last, next)); + tupleRange = new TupleRange(sortSpecs, last, next); } + + ranges.add(tupleRange); last = ranges.get(ranges.size() - 1).getEnd(); reminder = reminder.subtract(term); } + for (TupleRange r : ranges) { + denormalize(sortSpecs, r); + } + return ranges.toArray(new TupleRange[ranges.size()]); } + public static void normalize(final SortSpec [] sortSpecs, TupleRange range) { + // normalize text fields to have same bytes length + for (int i = 0; i < sortSpecs.length; i++) { + if (sortSpecs[i].getSortKey().getDataType().getType() == TajoDataTypes.Type.TEXT) { + byte [] startBytes; + byte [] endBytes; + if (range.getStart().isNull(i)) { + startBytes = BigInteger.ZERO.toByteArray(); + } else { + startBytes = range.getStart().getBytes(i); + } + + if (range.getEnd().isNull(i)) { + endBytes = BigInteger.ZERO.toByteArray(); + } else { + endBytes = range.getEnd().getBytes(i); + } + + byte [][] padded = BytesUtils.padBytes(startBytes, endBytes); + range.getStart().put(i, DatumFactory.createText(padded[0])); + range.getEnd().put(i, DatumFactory.createText(padded[1])); + } + } + } + + public static void denormalize(SortSpec [] sortSpecs, TupleRange range) { + for (int i = 0; i < sortSpecs.length; i++) { + if (sortSpecs[i].getSortKey().getDataType().getType() == TajoDataTypes.Type.TEXT) { + range.getStart().put(i,DatumFactory.createText(BytesUtils.trimBytes(range.getStart().getBytes(i)))); + range.getEnd().put(i,DatumFactory.createText(BytesUtils.trimBytes(range.getEnd().getBytes(i)))); + } + } + } + /** * Check whether an overflow occurs or not. * @@ -150,38 +180,38 @@ public boolean isOverflow(int colId, Datum last, BigInteger inc, SortSpec [] sor case BIT: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(last.asByte())); - return new BigDecimal(range.getEnd().get(colId).asByte()).compareTo(candidate) < 0; + return new BigDecimal(mergedRange.getEnd().get(colId).asByte()).compareTo(candidate) < 0; } else { candidate = new BigDecimal(last.asByte()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asByte())) < 0; + return candidate.compareTo(new BigDecimal(mergedRange.getEnd().get(colId).asByte())) < 0; } } case CHAR: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal((int)last.asChar())); - return new BigDecimal((int)range.getEnd().get(colId).asChar()).compareTo(candidate) < 0; + return new BigDecimal((int) mergedRange.getEnd().get(colId).asChar()).compareTo(candidate) < 0; } else { candidate = new BigDecimal((int)last.asChar()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal((int)range.getEnd().get(colId).asChar())) < 0; + return candidate.compareTo(new BigDecimal((int) mergedRange.getEnd().get(colId).asChar())) < 0; } } case INT2: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(last.asInt2())); - return new BigDecimal(range.getEnd().get(colId).asInt2()).compareTo(candidate) < 0; + return new BigDecimal(mergedRange.getEnd().get(colId).asInt2()).compareTo(candidate) < 0; } else { candidate = new BigDecimal(last.asInt2()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt2())) < 0; + return candidate.compareTo(new BigDecimal(mergedRange.getEnd().get(colId).asInt2())) < 0; } } case DATE: case INT4: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(last.asInt4())); - return new BigDecimal(range.getEnd().get(colId).asInt4()).compareTo(candidate) < 0; + return new BigDecimal(mergedRange.getEnd().get(colId).asInt4()).compareTo(candidate) < 0; } else { candidate = new BigDecimal(last.asInt4()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt4())) < 0; + return candidate.compareTo(new BigDecimal(mergedRange.getEnd().get(colId).asInt4())) < 0; } } case TIME: @@ -189,48 +219,43 @@ public boolean isOverflow(int colId, Datum last, BigInteger inc, SortSpec [] sor case INT8: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(last.asInt8())); - return new BigDecimal(range.getEnd().get(colId).asInt8()).compareTo(candidate) < 0; + return new BigDecimal(mergedRange.getEnd().get(colId).asInt8()).compareTo(candidate) < 0; } else { candidate = new BigDecimal(last.asInt8()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asInt8())) < 0; + return candidate.compareTo(new BigDecimal(mergedRange.getEnd().get(colId).asInt8())) < 0; } } case FLOAT4: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(last.asFloat4())); - return new BigDecimal(range.getEnd().get(colId).asFloat4()).compareTo(candidate) < 0; + return new BigDecimal(mergedRange.getEnd().get(colId).asFloat4()).compareTo(candidate) < 0; } else { candidate = new BigDecimal(last.asFloat4()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asFloat4())) < 0; + return candidate.compareTo(new BigDecimal(mergedRange.getEnd().get(colId).asFloat4())) < 0; } } case FLOAT8: { if (sortSpecs[colId].isAscending()) { candidate = incDecimal.add(new BigDecimal(last.asFloat8())); - return new BigDecimal(range.getEnd().get(colId).asFloat8()).compareTo(candidate) < 0; + return new BigDecimal(mergedRange.getEnd().get(colId).asFloat8()).compareTo(candidate) < 0; } else { candidate = new BigDecimal(last.asFloat8()).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(range.getEnd().get(colId).asFloat8())) < 0; + return candidate.compareTo(new BigDecimal(mergedRange.getEnd().get(colId).asFloat8())) < 0; } } case TEXT: { - byte [] lastBytesPadded; - byte [] endBytesPadded; - byte [] lastBytes = last.asByteArray(); - byte [] endBytes = range.getEnd().getBytes(colId); + byte [] endBytes = mergedRange.getEnd().getBytes(colId); - byte [][] padded = BytesUtils.padBytes(lastBytes, endBytes); - lastBytesPadded = padded[0]; - endBytesPadded = padded[1]; + Preconditions.checkState(lastBytes.length == endBytes.length); if (sortSpecs[colId].isAscending()) { - candidate = incDecimal.add(new BigDecimal(new BigInteger(lastBytesPadded))); - return new BigDecimal(new BigInteger(endBytesPadded)).compareTo(candidate) < 0; + candidate = incDecimal.add(new BigDecimal(new BigInteger(lastBytes))); + return new BigDecimal(new BigInteger(endBytes)).compareTo(candidate) < 0; } else { - candidate = new BigDecimal(new BigInteger(lastBytesPadded)).subtract(incDecimal); - return candidate.compareTo(new BigDecimal(new BigInteger(endBytesPadded))) < 0; + candidate = new BigDecimal(new BigInteger(lastBytes)).subtract(incDecimal); + return candidate.compareTo(new BigDecimal(new BigInteger(endBytes))) < 0; } } case INET4: { @@ -242,14 +267,14 @@ public boolean isOverflow(int colId, Datum last, BigInteger inc, SortSpec [] sor return true; } Bytes.putInt(candidateBytesVal, 0, candidateIntVal); - return Bytes.compareTo(range.getEnd().get(colId).asByteArray(), candidateBytesVal) < 0; + return Bytes.compareTo(mergedRange.getEnd().get(colId).asByteArray(), candidateBytesVal) < 0; } else { candidateIntVal = last.asInt4() - incDecimal.intValue(); if (candidateIntVal + incDecimal.intValue() != last.asInt4()) { return true; } Bytes.putInt(candidateBytesVal, 0, candidateIntVal); - return Bytes.compareTo(candidateBytesVal, range.getEnd().get(colId).asByteArray()) < 0; + return Bytes.compareTo(candidateBytesVal, mergedRange.getEnd().get(colId).asByteArray()) < 0; } } } @@ -262,20 +287,20 @@ public long incrementAndGetReminder(int colId, Datum last, long inc) { switch (column.getDataType().getType()) { case BIT: { long candidate = last.asByte() + inc; - byte end = range.getEnd().get(colId).asByte(); + byte end = mergedRange.getEnd().get(colId).asByte(); reminder = candidate - end; break; } case CHAR: { long candidate = last.asChar() + inc; - char end = range.getEnd().get(colId).asChar(); + char end = mergedRange.getEnd().get(colId).asChar(); reminder = candidate - end; break; } case DATE: case INT4: { int candidate = (int) (last.asInt4() + inc); - int end = range.getEnd().get(colId).asInt4(); + int end = mergedRange.getEnd().get(colId).asInt4(); reminder = candidate - end; break; } @@ -284,29 +309,30 @@ public long incrementAndGetReminder(int colId, Datum last, long inc) { case INT8: case INET4: { long candidate = last.asInt8() + inc; - long end = range.getEnd().get(colId).asInt8(); + long end = mergedRange.getEnd().get(colId).asInt8(); reminder = candidate - end; break; } case FLOAT4: { float candidate = last.asFloat4() + inc; - float end = range.getEnd().get(colId).asFloat4(); + float end = mergedRange.getEnd().get(colId).asFloat4(); reminder = (long) (candidate - end); break; } case FLOAT8: { double candidate = last.asFloat8() + inc; - double end = range.getEnd().get(colId).asFloat8(); + double end = mergedRange.getEnd().get(colId).asFloat8(); reminder = (long) Math.ceil(candidate - end); break; } case TEXT: { byte [] lastBytes = last.asByteArray(); - byte [] endBytes = range.getEnd().get(colId).asByteArray(); + byte [] endBytes = mergedRange.getEnd().get(colId).asByteArray(); + + Preconditions.checkState(lastBytes.length == endBytes.length); - byte [][] padded = BytesUtils.padBytes(lastBytes, endBytes); - BigInteger lastBInt = new BigInteger(padded[0]); - BigInteger endBInt = new BigInteger(padded[1]); + BigInteger lastBInt = new BigInteger(lastBytes); + BigInteger endBInt = new BigInteger(endBytes); BigInteger incBInt = BigInteger.valueOf(inc); BigInteger candidate = lastBInt.add(incBInt); @@ -350,7 +376,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi for (int i = finalId; i >= 0; i--) { if (isOverflow(i, last.get(i), incs[i], sortSpecs)) { if (i == 0) { - throw new RangeOverflowException(range, last, incs[i].longValue()); + throw new RangeOverflowException(mergedRange, last, incs[i].longValue(), sortSpecs[i].isAscending()); } long rem = incrementAndGetReminder(i, last.get(i), value.longValue()); incs[i] = BigInteger.valueOf(rem); @@ -377,7 +403,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi switch (column.getDataType().getType()) { case CHAR: if (overflowFlag[i]) { - end.put(i, DatumFactory.createChar((char) (range.getStart().get(i).asChar() + incs[i].longValue()))); + end.put(i, DatumFactory.createChar((char) (mergedRange.getStart().get(i).asChar() + incs[i].longValue()))); } else { end.put(i, DatumFactory.createChar((char) (last.get(i).asChar() + incs[i].longValue()))); } @@ -385,7 +411,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case BIT: if (overflowFlag[i]) { end.put(i, DatumFactory.createBit( - (byte) (range.getStart().get(i).asByte() + incs[i].longValue()))); + (byte) (mergedRange.getStart().get(i).asByte() + incs[i].longValue()))); } else { end.put(i, DatumFactory.createBit((byte) (last.get(i).asByte() + incs[i].longValue()))); } @@ -393,7 +419,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case INT2: if (overflowFlag[i]) { end.put(i, DatumFactory.createInt2( - (short) (range.getStart().get(i).asInt2() + incs[i].longValue()))); + (short) (mergedRange.getStart().get(i).asInt2() + incs[i].longValue()))); } else { end.put(i, DatumFactory.createInt2((short) (last.get(i).asInt2() + incs[i].longValue()))); } @@ -401,7 +427,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case INT4: if (overflowFlag[i]) { end.put(i, DatumFactory.createInt4( - (int) (range.getStart().get(i).asInt4() + incs[i].longValue()))); + (int) (mergedRange.getStart().get(i).asInt4() + incs[i].longValue()))); } else { if (sortSpecs[i].isAscending()) { end.put(i, DatumFactory.createInt4((int) (last.get(i).asInt4() + incs[i].longValue()))); @@ -413,7 +439,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case INT8: if (overflowFlag[i]) { end.put(i, DatumFactory.createInt8( - range.getStart().get(i).asInt8() + incs[i].longValue())); + mergedRange.getStart().get(i).asInt8() + incs[i].longValue())); } else { end.put(i, DatumFactory.createInt8(last.get(i).asInt8() + incs[i].longValue())); } @@ -421,7 +447,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case FLOAT4: if (overflowFlag[i]) { end.put(i, DatumFactory.createFloat4( - range.getStart().get(i).asFloat4() + incs[i].longValue())); + mergedRange.getStart().get(i).asFloat4() + incs[i].longValue())); } else { end.put(i, DatumFactory.createFloat4(last.get(i).asFloat4() + incs[i].longValue())); } @@ -429,31 +455,35 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case FLOAT8: if (overflowFlag[i]) { end.put(i, DatumFactory.createFloat8( - range.getStart().get(i).asFloat8() + incs[i].longValue())); + mergedRange.getStart().get(i).asFloat8() + incs[i].longValue())); } else { end.put(i, DatumFactory.createFloat8(last.get(i).asFloat8() + incs[i].longValue())); } break; case TEXT: if (overflowFlag[i]) { - end.put(i, DatumFactory.createText(((char) (range.getStart().get(i).asChars().charAt(0) + end.put(i, DatumFactory.createText(((char) (mergedRange.getStart().get(i).asChars().charAt(0) + incs[i].longValue())) + "")); } else { - BigInteger lastBigInt = UnsignedLong.valueOf(new BigInteger(last.get(i).asByteArray())).bigIntegerValue(); - BigInteger incBigInt = interval; - end.put(i, DatumFactory.createText(lastBigInt.add(incBigInt).toByteArray())); + BigInteger lastBigInt; + if (last.isNull(i)) { + lastBigInt = BigInteger.valueOf(0); + } else { + lastBigInt = UnsignedLong.valueOf(new BigInteger(last.get(i).asByteArray())).bigIntegerValue(); + } + end.put(i, DatumFactory.createText(lastBigInt.add(incs[i]).toByteArray())); } break; case DATE: if (overflowFlag[i]) { - end.put(i, DatumFactory.createDate((int) (range.getStart().get(i).asInt4() + incs[i].longValue()))); + end.put(i, DatumFactory.createDate((int) (mergedRange.getStart().get(i).asInt4() + incs[i].longValue()))); } else { end.put(i, DatumFactory.createDate((int) (last.get(i).asInt4() + incs[i].longValue()))); } break; case TIME: if (overflowFlag[i]) { - end.put(i, DatumFactory.createTime(range.getStart().get(i).asInt8() + incs[i].longValue())); + end.put(i, DatumFactory.createTime(mergedRange.getStart().get(i).asInt8() + incs[i].longValue())); } else { end.put(i, DatumFactory.createTime(last.get(i).asInt8() + incs[i].longValue())); } @@ -461,7 +491,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case TIMESTAMP: if (overflowFlag[i]) { end.put(i, DatumFactory.createTimestmpDatumWithJavaMillis( - range.getStart().get(i).asInt8() + incs[i].longValue())); + mergedRange.getStart().get(i).asInt8() + incs[i].longValue())); } else { end.put(i, DatumFactory.createTimestmpDatumWithJavaMillis(last.get(i).asInt8() + incs[i].longValue())); } @@ -469,7 +499,7 @@ public Tuple increment(final Tuple last, BigInteger interval, final int baseDigi case INET4: byte[] ipBytes; if (overflowFlag[i]) { - ipBytes = range.getStart().get(i).asByteArray(); + ipBytes = mergedRange.getStart().get(i).asByteArray(); assert ipBytes.length == 4; end.put(i, DatumFactory.createInet4(ipBytes)); } else { diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java index 3199c57f4f..fc038ab59d 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java @@ -426,6 +426,39 @@ public void testPartitionForMultipleChars2() { assertTrue(ranges[partNum - 1].getEnd().equals(e)); } + @Test + public void testPartitionForMultipleChars2Desc() { + Schema schema = new Schema() + .addColumn("KEY1", Type.TEXT); + + SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema); + sortSpecs[0].setDescOrder(); + + Tuple s = new VTuple(1); + s.put(0, DatumFactory.createText("A999975")); + Tuple e = new VTuple(1); + e.put(0, DatumFactory.createText("A1")); + + final int partNum = 48; + + TupleRange expected = new TupleRange(sortSpecs, s, e); + RangePartitionAlgorithm partitioner = + new UniformRangePartition(expected, sortSpecs, true); + TupleRange [] ranges = partitioner.partition(partNum); + + TupleRange prev = null; + for (TupleRange r : ranges) { + if (prev == null) { + prev = r; + } else { + assertTrue(prev.compareTo(r) > 0); + } + } + assertEquals(partNum, ranges.length); + assertTrue(ranges[0].getStart().equals(s)); + assertTrue(ranges[partNum - 1].getEnd().equals(e)); + } + @Test public void testPartitionForMultipleCharsWithSameFirstChar() { Schema schema = new Schema() @@ -505,7 +538,7 @@ public void testPartitionWithNull() { if (prev == null) { prev = r; } else { - assertTrue(prev.compareTo(r) > 0); + assertTrue(prev.compareTo(r) < 0); } } }