From 0b9bd167440b5e872f7ef02bae366d24e30e475d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 5 Nov 2015 16:43:47 +0900 Subject: [PATCH 1/5] Add a document and fixed a wrong configuration name --- .../java/org/apache/tajo/SessionVars.java | 4 +- .../java/org/apache/tajo/conf/TajoConf.java | 2 +- .../engine/planner/physical/TestSortExec.java | 2 +- .../tajo/engine/query/TestSortQuery.java | 2 +- .../planner/physical/ExternalSortExec.java | 2 +- .../sphinx/configuration/tajo-site-xml.rst | 450 +++++++++++++++++- tajo-docs/src/main/sphinx/time_zone.rst | 2 +- 7 files changed, 448 insertions(+), 16 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java index 2834e1448e..20fb793a43 100644 --- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java +++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java @@ -131,7 +131,7 @@ public enum SessionVars implements ConfigKey { NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT), CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT), AGG_HASH_TABLE_SIZE(ConfVars.$AGG_HASH_TABLE_SIZE, "Aggregation hash table size", DEFAULT), - SORT_HASH_TABLE_SIZE(ConfVars.$SORT_HASH_TABLE_SIZE, "Sort hash table size", DEFAULT), + SORT_LIST_SIZE(ConfVars.$SORT_LIST_SIZE, "List size for in-memory sort ", DEFAULT), JOIN_HASH_TABLE_SIZE(ConfVars.$JOIN_HASH_TABLE_SIZE, "Join hash table size", DEFAULT), // for index @@ -140,7 +140,7 @@ public enum SessionVars implements ConfigKey { // for partition overwrite PARTITION_NO_RESULT_OVERWRITE_ENABLED(ConfVars.$PARTITION_NO_RESULT_OVERWRITE_ENABLED, - "If True, a partitioned table is overwritten even if a sub query leads to no result. " + "If true, a partitioned table is overwritten even if a sub query leads to no result. " + "Otherwise, the table data will be kept if there is no result", DEFAULT), // Behavior Control --------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index e84da0f67e..de52e53145 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -345,7 +345,7 @@ public static enum ConfVars implements ConfigKey { $MAX_OUTPUT_FILE_SIZE("tajo.query.max-outfile-size-mb", 0), // zero means infinite $CODEGEN("tajo.executor.codegen.enabled", false), // Runtime code generation (todo this is broken) $AGG_HASH_TABLE_SIZE("tajo.executor.aggregate.hash-table.size", 10000), - $SORT_HASH_TABLE_SIZE("tajo.executor.sort.hash-table.size", 100000), + $SORT_LIST_SIZE("tajo.executor.sort.list.size", 100000), $JOIN_HASH_TABLE_SIZE("tajo.executor.join.hash-table.size", 100000), // for index diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java index 15708b1f17..30a63bef5c 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java @@ -75,7 +75,7 @@ public class TestSortExec { public static void setUp() throws Exception { conf = new TajoConf(); conf.setBoolVar(TajoConf.ConfVars.$TEST_MODE, true); - conf.setIntVar(ConfVars.$SORT_HASH_TABLE_SIZE, 100); + conf.setIntVar(ConfVars.$SORT_LIST_SIZE, 100); util = TpchTestBase.getInstance().getTestingCluster(); catalog = util.getMaster().getCatalog(); workDir = CommonTestingUtil.getTestDir(TEST_PATH); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSortQuery.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSortQuery.java index 6022672dc8..ce76ecbba6 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSortQuery.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSortQuery.java @@ -40,7 +40,7 @@ public TestSortQuery() { super(TajoConstants.DEFAULT_DATABASE_NAME); Map variables = new HashMap<>(); - variables.put(SessionVars.SORT_HASH_TABLE_SIZE.keyname(), "100"); + variables.put(SessionVars.SORT_LIST_SIZE.keyname(), "100"); client.updateSessionVariables(variables); } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java index 6ed22c81f6..b9ab3446c5 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java @@ -118,7 +118,7 @@ private ExternalSortExec(final TaskAttemptContext context, final SortNode plan) this.sortBufferBytesNum = context.getQueryContext().getLong(SessionVars.EXTSORT_BUFFER_SIZE) * StorageUnit.MB; this.allocatedCoreNum = context.getConf().getIntVar(ConfVars.EXECUTOR_EXTERNAL_SORT_THREAD_NUM); this.executorService = Executors.newFixedThreadPool(this.allocatedCoreNum); - this.inMemoryTable = new TupleList(context.getQueryContext().getInt(SessionVars.SORT_HASH_TABLE_SIZE)); + this.inMemoryTable = new TupleList(context.getQueryContext().getInt(SessionVars.SORT_LIST_SIZE)); this.sortTmpDir = getExecutorTmpDir(); localDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname); diff --git a/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst b/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst index d0c359715c..4269164405 100644 --- a/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst +++ b/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst @@ -2,23 +2,455 @@ The tajo-site.xml File ********************** -To the ``core-site.xml`` file on every host in your cluster, you must add the following information: +You can add more configurations in the ``tajo-site.xml`` file. Note that you should replicate this file to the whole hosts in your cluster once you edited. +If you are looking for the configurations for the master and the worker, please refer to :doc:`tajo_master_configuration` and :doc:`worker_configuration`. +Also, catalog configurations are found here :doc:`catalog_configuration`. + +========================= +Join Query Settings +========================= + +"""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.join.auto-broadcast` +"""""""""""""""""""""""""""""""""""""" + +A flag to enable or disable the use of broadcast join. + + * Property value: Boolean + * Default value: true + * Example + +.. code-block:: xml + + + tajo.dist-query.join.auto-broadcast + true + + +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.broadcast.non-cross-join.threshold-kb` +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A threshold for non-cross joins. When a non-cross join query is executed with the broadcast join, the whole size of broadcasted tables won't exceed this threshold. + + * Property value: Integer + * Unit: KB + * Default value: 5120 + * Example + +.. code-block:: xml + + + tajo.dist-query.broadcast.non-cross-join.threshold-kb + 5120 + + +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.broadcast.cross-join.threshold-kb` +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +A threshold for cross joins. When a cross join query is executed, the whole size of broadcasted tables won't exceed this threshold. + + * Property value: Integer + * Unit: KB + * Default value: 1024 + * Example + +.. code-block:: xml + + + tajo.dist-query.broadcast.cross-join.threshold-kb + 1024 + + +.. warning:: + In Tajo, the broadcast join is only the way to perform cross joins. Since the cross join is a very expensive operation, this value need to be tuned carefully. + +"""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.join.task-volume-mb` +"""""""""""""""""""""""""""""""""""""" + +The repartition join is executed in two stages. When a join query is executed with the repartition join, this value indicates the amount of input data processed by each task at the second stage. +As a result, it determines the degree of the parallel processing of the join query. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.dist-query.join.task-volume-mb + 64 + + +""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.join.partition-volume-mb` +""""""""""""""""""""""""""""""""""""""""""" + +The repartition join is executed in two stages. When a join query is executed with the repartition join, +this value indicates the output size of each task at the first stage, which determines the number of partitions to be shuffled between two stages. + + * Property value: Integer + * Unit: MB + * Default value: 128 + * Example + +.. code-block:: xml + + + tajo.dist-query.join.partition-volume-mb + 128 + + +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.executor.join.common.in-memory-hash-threshold-mb` +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +This value provides the criterion to decide the algorithm to perform a join in a task. +If the input data is smaller than this value, join is performed with the in-memory hash join. +Otherwise, the sort-merge join is used. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.executor.join.common.in-memory-hash-threshold-mb + 64 + + +.. warning:: + This value is the size of the input stored on file systems. So, when the input data is loaded into JVM heap, + its actual size is usually much larger than the configured value, which means that too large threshold can cause unexpected OutOfMemory errors. + This value should be tuned carefully. + +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.executor.join.inner.in-memory-hash-threshold-mb` +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +This value provides the criterion to decide the algorithm to perform an inner join in a task. +If the input data is smaller than this value, the inner join is performed with the in-memory hash join. +Otherwise, the sort-merge join is used. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.executor.join.inner.in-memory-hash-threshold-mb + 64 + + +.. warning:: + This value is the size of the input stored on file systems. So, when the input data is loaded into JVM heap, + its actual size is usually much larger than the configured value, which means that too large threshold can cause unexpected OutOfMemory errors. + This value should be tuned carefully. + +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.executor.join.outer.in-memory-hash-threshold-mb` +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +This value provides the criterion to decide the algorithm to perform an outer join in a task. +If the input data is smaller than this value, the outer join is performed with the in-memory hash join. +Otherwise, the sort-merge join is used. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.executor.join.outer.in-memory-hash-threshold-mb + 64 + + +.. warning:: + This value is the size of the input stored on file systems. So, when the input data is loaded into JVM heap, + its actual size is usually much larger than the configured value, which means that too large threshold can cause unexpected OutOfMemory errors. + This value should be tuned carefully. + +""""""""""""""""""""""""""""""""""""" +`tajo.executor.join.hash-table.size` +""""""""""""""""""""""""""""""""""""" + +The initial size of hash table for in-memory hash join. + + * Property value: Integer + * Default value: 100000 + * Example + +.. code-block:: xml + + + tajo.executor.join.hash-table.size + 100000 + ====================== -System Config +Sort Query Settings ====================== +"""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.sort.task-volume-mb` +"""""""""""""""""""""""""""""""""""""" + +The sort operation is executed in two stages. When a sort query is executed, this value indicates the amount of input data processed by each task at the second stage. +As a result, it determines the degree of the parallel processing of the sort query. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.dist-query.sort.task-volume-mb + 64 + + +"""""""""""""""""""""""""""""""""""""""" +`tajo.executor.external-sort.buffer-mb` +"""""""""""""""""""""""""""""""""""""""" + +A threshold to choose the sort algorithm. If the input data is larger than this threshold, the external sort algorithm is used. + + * Property value: Integer + * Unit: MB + * Default value: 200 + * Example + +.. code-block:: xml + + + tajo.executor.external-sort.buffer-mb + 200 + + +"""""""""""""""""""""""""""""""""""""" +`tajo.executor.sort.list.size` +"""""""""""""""""""""""""""""""""""""" +The initial size of list for in-memory sort. + + * Property value: Integer + * Default value: 100000 + * Example + +.. code-block:: xml + + + tajo.executor.sort.list.size + 100000 + + +========================= +Group by Query Settings +========================= + +"""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.groupby.multi-level-aggr` +"""""""""""""""""""""""""""""""""""""""""""" + +A flag to enable the multi-level algorithm for distinct aggregation. If this value is set, 3-phase aggregation algorithm is used. +Otherwise, 2-phase aggregation algorithm is used. + + * Property value: Boolean + * Default value: true + * Example + +.. code-block:: xml + + + tajo.dist-query.groupby.multi-level-aggr + true + + +"""""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.groupby.partition-volume-mb` +"""""""""""""""""""""""""""""""""""""""""""""" + +The aggregation is executed in two stages. When an aggregation query is executed, +this value indicates the output size of each task at the first stage, which determines the number of partitions to be shuffled between two stages. + + * Property value: Integer + * Unit: MB + * Default value: 256 + * Example + +.. code-block:: xml + + + tajo.dist-query.groupby.partition-volume-mb + 256 + + +"""""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.groupby.task-volume-mb` +"""""""""""""""""""""""""""""""""""""""""""""" + +The aggregation operation is executed in two stages. When an aggregation query is executed, this value indicates the amount of input data processed by each task at the second stage. +As a result, it determines the degree of the parallel processing of the aggregation query. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.dist-query.groupby.partition-volume-mb + 64 + + +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.executor.groupby.in-memory-hash-threshold-mb` +"""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +This value provides the criterion to decide the algorithm to perform an aggregation in a task. +If the input data is smaller than this value, the aggregation is performed with the in-memory hash aggregation. +Otherwise, the sort-based aggregation is used. + + * Property value: Integer + * Unit: MB + * Default value: 64 + * Example + +.. code-block:: xml + + + tajo.executor.groupby.in-memory-hash-threshold-mb + 64 + + +.. warning:: + This value is the size of the input stored on file systems. So, when the input data is loaded into JVM heap, + its actual size is usually much larger than the configured value, which means that too large threshold can cause unexpected OutOfMemory errors. + This value should be tuned carefully. + +"""""""""""""""""""""""""""""""""""""""""" +`tajo.executor.aggregate.hash-table.size` +"""""""""""""""""""""""""""""""""""""""""" + +The initial size of list for in-memory sort. + + * Property value: Integer + * Default value: 10000 + * Example + +.. code-block:: xml + + + tajo.executor.aggregate.hash-table.size + 10000 + ====================== Date/Time Settings ====================== -+--------------------------+----------------+--------------------------------------------------------+ -| Property Name | Property Value | Descriptions | -+==========================+================+========================================================+ -| tajo.timezone | Time zone id | Refer to :doc:`/time_zone` | -+--------------------------+----------------+--------------------------------------------------------+ -| tajo.datetime.date-order | Date order | Determine date order. It should be one of YMD, DMY, MDY| -+--------------------------+----------------+--------------------------------------------------------+ +""""""""""""""""""" +`tajo.timezone` +""""""""""""""""""" + +Refer to :doc:`/time_zone`. + + * Property value: Time zone id + * Default value: Default time zone of JVM + * Example + +.. code-block:: xml + + + tajo.timezone + GMT+9 + + +""""""""""""""""""""""""""" +`tajo.datetime.date-order` +""""""""""""""""""""""""""" + +Date order specification. + + * Property value: One of YMD, DMY, MDY. + * Default value: YMD + * Example + +.. code-block:: xml + + + tajo.datetime.date-order + YMD + + +====================== +Table partitions +====================== + +"""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.partition.overwrite.even-if-no-result` +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +If this value is true, a partitioned table is overwritten even if a subquery leads to no result. Otherwise, the table data will be kept if there is no result. + + * Property value: Boolean + * Default value: false + * Example + +.. code-block:: xml + + + tajo.partition.overwrite.even-if-no-result + false + + +"""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.dist-query.table-partition.task-volume-mb` +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +In Tajo, storing a partition table is executed in two stages. +This value indicates the output size of a task of the former stage, which determines the number of partitions to be shuffled between two stages. + + * Property value: Integer + * Unit: MB + * Default value: 256 + * Example + +.. code-block:: xml + + + tajo.dist-query.table-partition.task-volume-mb + 256 + + +====================== +Arithmetic Settings +====================== + +"""""""""""""""""""""""""""""""""""""""""""""""""""" +`tajo.behavior.arithmetic-abort` +"""""""""""""""""""""""""""""""""""""""""""""""""""" + +A flag to indicate how to handle the errors caused by invalid arithmetic operations. If true, a running query will be terminated with an overflow or a divide-by-zero. + + * Property value: Boolean + * Default value: false + * Example + +.. code-block:: xml + + tajo.behavior.arithmetic-abort + false + \ No newline at end of file diff --git a/tajo-docs/src/main/sphinx/time_zone.rst b/tajo-docs/src/main/sphinx/time_zone.rst index 101012f3ed..9b86e18795 100644 --- a/tajo-docs/src/main/sphinx/time_zone.rst +++ b/tajo-docs/src/main/sphinx/time_zone.rst @@ -19,7 +19,7 @@ You can set the system time zone in *conf/tajo-site.xml* file as follows: .. code-block:: xml tajo.timezone - GMT+9 + GMT+9 ================== From da995994cad65ddc90d95bc245a23c5d4e88cd40 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Fri, 6 Nov 2015 13:23:21 +0900 Subject: [PATCH 2/5] Apply comments --- .../sphinx/configuration/tajo-site-xml.rst | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst b/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst index 4269164405..cdf03621cc 100644 --- a/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst +++ b/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst @@ -16,7 +16,7 @@ Join Query Settings A flag to enable or disable the use of broadcast join. - * Property value: Boolean + * Property value type: Boolean * Default value: true * Example @@ -33,7 +33,7 @@ A flag to enable or disable the use of broadcast join. A threshold for non-cross joins. When a non-cross join query is executed with the broadcast join, the whole size of broadcasted tables won't exceed this threshold. - * Property value: Integer + * Property value type: Integer * Unit: KB * Default value: 5120 * Example @@ -51,7 +51,7 @@ A threshold for non-cross joins. When a non-cross join query is executed with th A threshold for cross joins. When a cross join query is executed, the whole size of broadcasted tables won't exceed this threshold. - * Property value: Integer + * Property value type: Integer * Unit: KB * Default value: 1024 * Example @@ -73,7 +73,7 @@ A threshold for cross joins. When a cross join query is executed, the whole size The repartition join is executed in two stages. When a join query is executed with the repartition join, this value indicates the amount of input data processed by each task at the second stage. As a result, it determines the degree of the parallel processing of the join query. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -92,7 +92,7 @@ As a result, it determines the degree of the parallel processing of the join que The repartition join is executed in two stages. When a join query is executed with the repartition join, this value indicates the output size of each task at the first stage, which determines the number of partitions to be shuffled between two stages. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 128 * Example @@ -112,7 +112,7 @@ This value provides the criterion to decide the algorithm to perform a join in a If the input data is smaller than this value, join is performed with the in-memory hash join. Otherwise, the sort-merge join is used. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -137,7 +137,7 @@ This value provides the criterion to decide the algorithm to perform an inner jo If the input data is smaller than this value, the inner join is performed with the in-memory hash join. Otherwise, the sort-merge join is used. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -162,7 +162,7 @@ This value provides the criterion to decide the algorithm to perform an outer jo If the input data is smaller than this value, the outer join is performed with the in-memory hash join. Otherwise, the sort-merge join is used. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -185,7 +185,7 @@ Otherwise, the sort-merge join is used. The initial size of hash table for in-memory hash join. - * Property value: Integer + * Property value type: Integer * Default value: 100000 * Example @@ -207,7 +207,7 @@ Sort Query Settings The sort operation is executed in two stages. When a sort query is executed, this value indicates the amount of input data processed by each task at the second stage. As a result, it determines the degree of the parallel processing of the sort query. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -225,7 +225,7 @@ As a result, it determines the degree of the parallel processing of the sort que A threshold to choose the sort algorithm. If the input data is larger than this threshold, the external sort algorithm is used. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 200 * Example @@ -243,7 +243,7 @@ A threshold to choose the sort algorithm. If the input data is larger than this The initial size of list for in-memory sort. - * Property value: Integer + * Property value type: Integer * Default value: 100000 * Example @@ -265,7 +265,7 @@ Group by Query Settings A flag to enable the multi-level algorithm for distinct aggregation. If this value is set, 3-phase aggregation algorithm is used. Otherwise, 2-phase aggregation algorithm is used. - * Property value: Boolean + * Property value type: Boolean * Default value: true * Example @@ -283,7 +283,7 @@ Otherwise, 2-phase aggregation algorithm is used. The aggregation is executed in two stages. When an aggregation query is executed, this value indicates the output size of each task at the first stage, which determines the number of partitions to be shuffled between two stages. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 256 * Example @@ -302,7 +302,7 @@ this value indicates the output size of each task at the first stage, which dete The aggregation operation is executed in two stages. When an aggregation query is executed, this value indicates the amount of input data processed by each task at the second stage. As a result, it determines the degree of the parallel processing of the aggregation query. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -310,7 +310,7 @@ As a result, it determines the degree of the parallel processing of the aggregat .. code-block:: xml - tajo.dist-query.groupby.partition-volume-mb + tajo.dist-query.groupby.task-volume-mb 64 @@ -322,7 +322,7 @@ This value provides the criterion to decide the algorithm to perform an aggregat If the input data is smaller than this value, the aggregation is performed with the in-memory hash aggregation. Otherwise, the sort-based aggregation is used. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 64 * Example @@ -343,9 +343,9 @@ Otherwise, the sort-based aggregation is used. `tajo.executor.aggregate.hash-table.size` """""""""""""""""""""""""""""""""""""""""" -The initial size of list for in-memory sort. +The initial size of hash table for in-memory aggregation. - * Property value: Integer + * Property value type: Integer * Default value: 10000 * Example @@ -366,7 +366,7 @@ Date/Time Settings Refer to :doc:`/time_zone`. - * Property value: Time zone id + * Property value type: Time zone id * Default value: Default time zone of JVM * Example @@ -383,7 +383,7 @@ Refer to :doc:`/time_zone`. Date order specification. - * Property value: One of YMD, DMY, MDY. + * Property value type: One of YMD, DMY, MDY. * Default value: YMD * Example @@ -404,7 +404,7 @@ Table partitions If this value is true, a partitioned table is overwritten even if a subquery leads to no result. Otherwise, the table data will be kept if there is no result. - * Property value: Boolean + * Property value type: Boolean * Default value: false * Example @@ -422,7 +422,7 @@ If this value is true, a partitioned table is overwritten even if a subquery lea In Tajo, storing a partition table is executed in two stages. This value indicates the output size of a task of the former stage, which determines the number of partitions to be shuffled between two stages. - * Property value: Integer + * Property value type: Integer * Unit: MB * Default value: 256 * Example @@ -444,7 +444,7 @@ Arithmetic Settings A flag to indicate how to handle the errors caused by invalid arithmetic operations. If true, a running query will be terminated with an overflow or a divide-by-zero. - * Property value: Boolean + * Property value type: Boolean * Default value: false * Example From d8577c7792f9685023eb672a2eee2995a5e6bbd8 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 9 Nov 2015 14:28:27 +0900 Subject: [PATCH 3/5] Fix test failure --- .../resources/results/TestTajoCli/testHelpSessionVars.result | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result index 4d4cc2ba33..7cccc2d10b 100644 --- a/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result +++ b/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result @@ -37,11 +37,11 @@ Available Session Variables: \set NULL_CHAR [text value] - null char of text file output \set CODEGEN [true or false] - Runtime code generation enabled (experiment) \set AGG_HASH_TABLE_SIZE [int value] - Aggregation hash table size -\set SORT_HASH_TABLE_SIZE [int value] - Sort hash table size +\set SORT_LIST_SIZE [int value] - List size for in-memory sort \set JOIN_HASH_TABLE_SIZE [int value] - Join hash table size \set INDEX_ENABLED [true or false] - index scan enabled \set INDEX_SELECTIVITY_THRESHOLD [real value] - the selectivity threshold for index scan -\set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If True, a partitioned table is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept if there is no result +\set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If true, a partitioned table is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept if there is no result \set ARITHABORT [true or false] - If true, a running query will be terminated when an overflow or divide-by-zero occurs. \set FETCH_ROWNUM [int value] - Sets the number of rows at a time from Master \set BLOCK_ON_RESULT [true or false] - Whether to block result set on query execution From 10a42945a0811e86612b17e8655f16bd00e80db5 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 9 Nov 2015 14:49:44 +0900 Subject: [PATCH 4/5] trigger ci --- CHANGES | 1 + .../resources/results/TestTajoCli/testHelpSessionVars.result | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index db7e2c7da0..7a472d6a5d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,6 @@ Tajo Change Log + Release 0.12.0 - unreleased NEW FEATURES diff --git a/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result index 7cccc2d10b..d65c346b4f 100644 --- a/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result +++ b/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result @@ -37,7 +37,7 @@ Available Session Variables: \set NULL_CHAR [text value] - null char of text file output \set CODEGEN [true or false] - Runtime code generation enabled (experiment) \set AGG_HASH_TABLE_SIZE [int value] - Aggregation hash table size -\set SORT_LIST_SIZE [int value] - List size for in-memory sort +\set SORT_LIST_SIZE [int value] - List size for in-memory sort \set JOIN_HASH_TABLE_SIZE [int value] - Join hash table size \set INDEX_ENABLED [true or false] - index scan enabled \set INDEX_SELECTIVITY_THRESHOLD [real value] - the selectivity threshold for index scan From daac664f5e390b83410cdef8bfb0207ca142be5d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 9 Nov 2015 15:30:07 +0900 Subject: [PATCH 5/5] Fix test failure --- tajo-common/src/main/java/org/apache/tajo/SessionVars.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java index 20fb793a43..46df6877fd 100644 --- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java +++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java @@ -131,7 +131,7 @@ public enum SessionVars implements ConfigKey { NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT), CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT), AGG_HASH_TABLE_SIZE(ConfVars.$AGG_HASH_TABLE_SIZE, "Aggregation hash table size", DEFAULT), - SORT_LIST_SIZE(ConfVars.$SORT_LIST_SIZE, "List size for in-memory sort ", DEFAULT), + SORT_LIST_SIZE(ConfVars.$SORT_LIST_SIZE, "List size for in-memory sort", DEFAULT), JOIN_HASH_TABLE_SIZE(ConfVars.$JOIN_HASH_TABLE_SIZE, "Join hash table size", DEFAULT), // for index