From fcc9e31f798e7949f8a082f5e532b2b51a771243 Mon Sep 17 00:00:00 2001 From: Yancheng Luo Date: Tue, 7 Jun 2016 17:40:52 +0800 Subject: [PATCH 1/2] HAWQ-784. Refine the document of 'hawq register' to give user information about data types in HIVE and HAWQ. --- tools/doc/hawqregister_help | 53 ++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/tools/doc/hawqregister_help b/tools/doc/hawqregister_help index ade1e3a665..08092765a2 100644 --- a/tools/doc/hawqregister_help +++ b/tools/doc/hawqregister_help @@ -25,7 +25,7 @@ and then update the table meta data to include the files. To use "hawq register", HAWQ must have been started. -Currently "hawq register" supports parquet tables only. +Currently "hawq register" only supports parquet tables generated by HIVE and HAWQ. User have to make sure that the meta data of the parquet file(s) and the table are consistent. The table to be registered into should not be hash distributed, which @@ -89,7 +89,7 @@ EXAMPLES ***************************************************** Run "hawq register" to register a parquet file in HDFS with path -'hdfs://localhost:8020/temp/hive.paq' generated by hive into table +'hdfs://localhost:8020/temp/hive.paq' generated by HIVE into table 'parquet_table' in HAWQ, which is in the database named 'postgres'. Assume the location of the database is 'hdfs://localhost:8020/hawq_default', @@ -106,33 +106,26 @@ table 'pg_aoseg.pg_paqseg_77160'. ***************************************************** DATA TYPES ***************************************************** -The data types used in HAWQ and parquet format are not the same, so there is a + +Currently "hawq register" only supports to register parquet file(s) generated by HIVE +or HAWQ into HAWQ. To finish that, make sure to follow the mapping rules listed below. + +The data types used in HIVE and HAWQ are not the same, so there is a mapping between them, concluded as follow: -Data types in HAWQ Data types in parquet -bool boolean -int2 int32 -int4 int32 -date int32 -int8 int64 -time int64 -timestamptz int64 -timestamp int64 -money int64 -float4 float -float8 double -bit byte_array -varbit byte_array -byte byte_array -numeric byte_array -name byte_array -char byte_array -bpchar byte_array -varchar byte_array -text byte_array -xml byte_array -timetz byte_array -interval byte_array -macaddr byte_array -inet byte_array -cidr byte_array +Data types in HIVE Data types in HAWQ +boolean bool +tinyint int2 +smallint int2 / smallint +int int4 / int +bigint int8 / bigint +date date +float float4 +double float8 / double precision +string varchar +binary bytea +char char +varchar varchar + +Limitations: Currently the conversion of the following date types in HIVE is not supported: +timestamp, decimal, array, struct, map, union From 7f976d8fbf7e95f24d1eb8eb8d95453ee19af149 Mon Sep 17 00:00:00 2001 From: Yancheng Luo Date: Tue, 7 Jun 2016 18:09:44 +0800 Subject: [PATCH 2/2] HAWQ-784. Add tests to hawq register for data types and NULL check. --- .../ManagementTool/test_hawq_register.cpp | 39 ++++++++++++++++++ .../test_hawq_register_data_types.paq | Bin 0 -> 1232 bytes .../test_hawq_register_null.paq | Bin 0 -> 926 bytes 3 files changed, 39 insertions(+) create mode 100644 src/test/feature/ManagementTool/test_hawq_register_data_types.paq create mode 100644 src/test/feature/ManagementTool/test_hawq_register_null.paq diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp index 2efbd84a87..c802642f58 100644 --- a/src/test/feature/ManagementTool/test_hawq_register.cpp +++ b/src/test/feature/ManagementTool/test_hawq_register.cpp @@ -54,6 +54,45 @@ TEST_F(TestHawqRegister, TestSingleHiveFile) { util.execute("drop table hawqregister;"); } +TEST_F(TestHawqRegister, TestDataTypes) { + SQLUtility util; + string rootPath(util.getTestRootPath()); + /* This parquet file is generate by HIVE, using the table created by */ + /* 'create table parquet(a boolean, b tinyint, c smallint, d int, e bigint, f date, g float, h double, i string, j binary, k char(10), l varchar(10)) stored as parquet;' */ + string relativePath("/ManagementTool/test_hawq_register_data_types.paq"); + string filePath = rootPath + relativePath; + + EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_data_types.paq")); + + util.execute("create table hawqregister(a bool, b int2, c int2, d int4, e int8, f date, g float4, h float8, i varchar, j bytea, k char, l varchar) with (appendonly=true, orientation=parquet);"); + util.query("select * from hawqregister;", 0); + + EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_data_types.paq")); + + util.query("select * from hawqregister;", 1); + util.execute("drop table hawqregister;"); +} + +TEST_F(TestHawqRegister, TestAllNULL) { + SQLUtility util; + string rootPath(util.getTestRootPath()); + /* This parquet file is generate by HIVE, using the table created by */ + /* 'create table parquet(a boolean, b tinyint, c smallint, d int, e bigint, f date, g float, h double, i string, j binary, k char(10), l varchar(10)) stored as parquet;' */ + /* with all the values set to NULL */ + string relativePath("/ManagementTool/test_hawq_register_null.paq"); + string filePath = rootPath + relativePath; + + EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_data_types.paq")); + + util.execute("create table hawqregister(a bool, b int2, c int2, d int4, e int8, f date, g float4, h float8, i varchar, j bytea, k char, l varchar) with (appendonly=true, orientation=parquet);"); + util.query("select * from hawqregister;", 0); + + EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_data_types.paq")); + + util.query("select * from hawqregister;", 1); + util.execute("drop table hawqregister;"); +} + TEST_F(TestHawqRegister, TestFiles) { SQLUtility util; string rootPath(util.getTestRootPath()); diff --git a/src/test/feature/ManagementTool/test_hawq_register_data_types.paq b/src/test/feature/ManagementTool/test_hawq_register_data_types.paq new file mode 100644 index 0000000000000000000000000000000000000000..88c3c6bacf58f83d9f104b71f4c65385d467cdb8 GIT binary patch literal 1232 zcmZvcy-piJ5Xbk~-G&Jp{cfNUyWHV-LYz73_i8e*E%ozPL=jok#QJEC7MW(lLb*Ffhr6nT;|V zGq5P0Wb?GsVqjC`n86m=Ii5J#MF(G+@=h{)T11xUakK5T@1*SjYkz9ktc^thqx^s$ zNZZ#WD5nlKnlYhbj`)$B`mgfSL@2Ke4UG!)%0RZFLiwVzalyV2Talr>HyyC@781p- z_zJ^*slL)v=Pdu!v|dt`Gvq@I`$~lJ8Q1N6&*P&0a}DcPlqhd42exu{Uev#}us&&x z)K+@0^>6A9wDO#V%9--1ys6{a7CMx_8ya<@s2QJO*e4AZI+PDOcUo`{iYsL(ubbL_ z%YwUZ(tf21`QIit+3wdL!TRS#^6TT-+PzHH=Zo(v*W26Q^W5F=VzG?fK@bLmI7mie l*o_BC97gGA-0KI4*YA(h-hLYQ(@`3w@7W!GB!~E4_78UFfOh}@ literal 0 HcmV?d00001 diff --git a/src/test/feature/ManagementTool/test_hawq_register_null.paq b/src/test/feature/ManagementTool/test_hawq_register_null.paq new file mode 100644 index 0000000000000000000000000000000000000000..44a8915c7483003d9908b92930de8932bab82b28 GIT binary patch literal 926 zcmeIxNp8Y06b9gjNl`?zbws2rvH?-4MGc`0QiarAyQxqwP}#9f!q9=~0IYffPQZ#2 za0pI-y5Izyp-yF6S@r;3ME}eC^lZtN2M;eH2JB$x8Y5Yhu z%OqX#*Lj@qDZv$acvVy>5Wpx)sxnr|09#p~tA9~-0NYt-2)LYOBJU;3v?*Tie=YryX~5N|9hRazK@YsYP7F(k<-}dLg}uMV`g|)P=1nb z>BYJ6llBeD%?sljq~;IOZ9P9Ue$c)y`naq;F}^OU`Mq>Udun{I{hara@w4`=m754VHZmwa(EodqjCo2Tz@LD=Xv!k|u<>4XI>LP?7e-l6oC gwK$`(==7Qq;b9c@M6)YcBswA%SMV%f-$1_1A7MRe1poj5 literal 0 HcmV?d00001