From 3ca5b9fdcd56a70aaa32df1cedfe689fc48559a3 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Fri, 20 Nov 2015 17:05:22 +0900 Subject: [PATCH 1/6] use 'data format' instead of 'file format' --- .../main/java/org/apache/tajo/conf/TajoConf.java | 2 +- .../engine/planner/physical/PhysicalPlanUtil.java | 6 +++--- tajo-docs/src/main/sphinx/table_management.rst | 8 ++++---- .../main/sphinx/table_management/data_formats.rst | 15 +++++++++++++++ .../main/sphinx/table_management/file_formats.rst | 15 --------------- .../sphinx/table_management/table_overview.rst | 2 +- .../main/sphinx/table_management/tablespaces.rst | 4 ++-- .../org/apache/tajo/storage/StorageProperty.java | 6 +++--- 8 files changed, 29 insertions(+), 29 deletions(-) create mode 100644 tajo-docs/src/main/sphinx/table_management/data_formats.rst delete mode 100644 tajo-docs/src/main/sphinx/table_management/file_formats.rst diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index a2c1fb820e..9b9a743bf5 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -368,7 +368,7 @@ public static enum ConfVars implements ConfigKey { $TIMEZONE("tajo.timezone", TimeZone.getDefault().getID()), $DATE_ORDER("tajo.datetime.date-order", "YMD"), - // FILE FORMAT + // DATA FORMAT $TEXT_NULL("tajo.text.null", "\\\\N"), // Only for Debug and Testing diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java index 8d63410308..04aa310a19 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java @@ -203,7 +203,7 @@ public PhysicalExec visit(PhysicalExec exec, Stack stack, Class Date: Fri, 20 Nov 2015 17:05:51 +0900 Subject: [PATCH 2/6] compression document is written roughly --- .../sphinx/table_management/compression.rst | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tajo-docs/src/main/sphinx/table_management/compression.rst b/tajo-docs/src/main/sphinx/table_management/compression.rst index 3d03ba8c9c..f635b50658 100644 --- a/tajo-docs/src/main/sphinx/table_management/compression.rst +++ b/tajo-docs/src/main/sphinx/table_management/compression.rst @@ -1,5 +1,23 @@ -********************************* +*********** Compression -********************************* +*********** -.. todo:: \ No newline at end of file +Using compression makes data size compact and network traffic low. Most of Tajo data types support data compression feature. +Currently, compression configuration affcts only for stored data format and it is specified when a table is created as table meta information. +Compression for intermidate data or others is not supported now. + +=========================================== +Compression Properties for each Data Format +=========================================== + + .. csv-table:: Compression Properties and Codec Class + + **Data Format**,**Property Name**,**Avaliable Values** + text/json/rcfile/sequencefile [#f1]_,compression.codec,Fully Qualified Classname in Hadoop [#f2]_ + parquet,parquet.compression,uncompressed/snappy/gzip/lzo + orc,orc.compression.kind,none/snappy/zlib + +.. rubric:: Footnotes + +.. [#f1] For sequence file, you should specify 'compression.type' in addition to 'compression.codec'. Refer to :doc:`/table_management/sequencefile`. +.. [#f2] All classes are available if they implement `org.apache.hadoop.io.compress.CompressionCodec `_. From 0f5cdd82cac7691f325fe3d14ec2bdf48ca665ae Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Fri, 20 Nov 2015 17:11:50 +0900 Subject: [PATCH 3/6] some comment modified --- tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index 9b9a743bf5..9f788ebb8a 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -368,7 +368,7 @@ public static enum ConfVars implements ConfigKey { $TIMEZONE("tajo.timezone", TimeZone.getDefault().getID()), $DATE_ORDER("tajo.datetime.date-order", "YMD"), - // DATA FORMAT + // null character for text file output $TEXT_NULL("tajo.text.null", "\\\\N"), // Only for Debug and Testing From 051f0b29e5d5dad720a05b17a948735477daec90 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Tue, 24 Nov 2015 11:17:30 +0900 Subject: [PATCH 4/6] Update more --- .../src/main/sphinx/table_management/compression.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tajo-docs/src/main/sphinx/table_management/compression.rst b/tajo-docs/src/main/sphinx/table_management/compression.rst index f635b50658..67dd2559d1 100644 --- a/tajo-docs/src/main/sphinx/table_management/compression.rst +++ b/tajo-docs/src/main/sphinx/table_management/compression.rst @@ -2,20 +2,20 @@ Compression *********** -Using compression makes data size compact and network traffic low. Most of Tajo data types support data compression feature. -Currently, compression configuration affcts only for stored data format and it is specified when a table is created as table meta information. +Using compression can make data size compact, thereby enabling efficient use of network bandwidth and storage. Most of Tajo data formats support data compression feature. +Currently, compression configuration affects only for stored data format and it is specified when a table is created as table meta information(See `Create Table <../sql_language/ddl.html#create-table>`_). Compression for intermidate data or others is not supported now. =========================================== Compression Properties for each Data Format =========================================== - .. csv-table:: Compression Properties and Codec Class + .. csv-table:: Compression Properties **Data Format**,**Property Name**,**Avaliable Values** - text/json/rcfile/sequencefile [#f1]_,compression.codec,Fully Qualified Classname in Hadoop [#f2]_ - parquet,parquet.compression,uncompressed/snappy/gzip/lzo - orc,orc.compression.kind,none/snappy/zlib + :doc:`text`/:doc:`json`/:doc:`rcfile`/:doc:`sequencefile` [#f1]_,compression.codec,Fully Qualified Classname in Hadoop [#f2]_ + :doc:`parquet`,parquet.compression,uncompressed/snappy/gzip/lzo + :doc:`orc`,orc.compression.kind,none/snappy/zlib .. rubric:: Footnotes From 0ba6383a6d86a94ad5ce83542cb9ff4552a842e1 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Tue, 24 Nov 2015 11:41:14 +0900 Subject: [PATCH 5/6] Add a link --- tajo-docs/src/main/sphinx/table_management/table_overview.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tajo-docs/src/main/sphinx/table_management/table_overview.rst b/tajo-docs/src/main/sphinx/table_management/table_overview.rst index c0ec43bce8..4f4f764d7f 100644 --- a/tajo-docs/src/main/sphinx/table_management/table_overview.rst +++ b/tajo-docs/src/main/sphinx/table_management/table_overview.rst @@ -75,7 +75,8 @@ There are some common table properties which are used in most tables. Compression ----------- -.. todo:: + +See :doc:`compression`. Time zone --------- From b6f77d1b844608ccf67bdccf62d50eab9d55f018 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Tue, 24 Nov 2015 11:57:50 +0900 Subject: [PATCH 6/6] trivial --- .../main/sphinx/table_management/table_overview.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tajo-docs/src/main/sphinx/table_management/table_overview.rst b/tajo-docs/src/main/sphinx/table_management/table_overview.rst index 4f4f764d7f..7ecf53b6c0 100644 --- a/tajo-docs/src/main/sphinx/table_management/table_overview.rst +++ b/tajo-docs/src/main/sphinx/table_management/table_overview.rst @@ -1,10 +1,12 @@ -************************************* +*********************** Overview of Tajo Tables -************************************* +*********************** +======== Overview ======== +=========== Tablespaces =========== @@ -12,8 +14,9 @@ Tablespaces is a physical location where files or data objects representing data Please refer to :doc:`/table_management/tablespaces` if you want to know more information about tablespaces. +============= Managed Table -================ +============= ``CREATE TABLE`` statement lets you create a table located in the warehouse directory specified by the configuration property ``tajo.warehouse.directory`` or ``${tajo.root}/warehouse`` by default. For example: @@ -26,8 +29,9 @@ Managed Table ); +============== External Table -================ +============== ``CREATE EXTERNAL TABLE`` statement lets you create a table located in a specify location so that Tajo does not use a default data warehouse location for the table. External tables are in common used if you already have data generated. LOCATION clause must be required for an external table.