Permalink
Browse files

Nested Types: Pretty print complex types in DESCRIBE.

The current DESCRIBE prints the column type as a single string without
whitespace. As a result, the DESCRIBE output for tables with complex types
is basically unreadable/unusable, e.g., from the Impala shell.

This patch adds a prettyPrint() function to the FE Type and uses that
for generating a nicely formatted DESCRIBE output.

The output of DESCRIBE FORMATTED is intentionally not modified because
exact Hive-compatibility has been and presumably continues to be very
important to our users.

Change-Id: Ida810facdffd970948b837b83a60f9ddcd95f44d
Reviewed-on: http://gerrit.cloudera.org:8080/633
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins
  • Loading branch information...
Alex Behm Internal Jenkins
Alex Behm authored and Internal Jenkins committed Jul 30, 2015
1 parent 82ad449 commit 34ae7ade6bbdf7cf712d64b599a57d500832e9d4
@@ -1,5 +1,7 @@
package com.cloudera.impala.catalog;
import org.apache.commons.lang3.StringUtils;
import com.cloudera.impala.thrift.TColumnType;
import com.cloudera.impala.thrift.TTypeNode;
import com.cloudera.impala.thrift.TTypeNodeType;
@@ -18,9 +20,7 @@ public ArrayType(Type itemType) {
public Type getItemType() { return itemType_; }
@Override
public String toSql() {
return String.format("ARRAY<%s>", itemType_.toSql());
}
public String toSql() { return String.format("ARRAY<%s>", itemType_.toSql()); }
@Override
public boolean equals(Object other) {
@@ -37,5 +37,15 @@ public void toThrift(TColumnType container) {
node.setType(TTypeNodeType.ARRAY);
itemType_.toThrift(container);
}
}
@Override
protected String prettyPrint(int lpad) {
String leftPadding = StringUtils.repeat(' ', lpad);
if (!itemType_.isStructType()) return leftPadding + toSql();
// Pass in the padding to make sure nested fields are aligned properly,
// even if we then strip the top-level padding.
String structStr = itemType_.prettyPrint(lpad);
structStr = structStr.substring(lpad);
return String.format("%sARRAY<%s>", leftPadding, structStr);
}
}
@@ -1,5 +1,7 @@
package com.cloudera.impala.catalog;
import org.apache.commons.lang3.StringUtils;
import com.cloudera.impala.thrift.TColumnType;
import com.cloudera.impala.thrift.TTypeNode;
import com.cloudera.impala.thrift.TTypeNodeType;
@@ -27,6 +29,17 @@ public String toSql() {
return String.format("MAP<%s,%s>", keyType_.toSql(), valueType_.toSql());
}
@Override
protected String prettyPrint(int lpad) {
String leftPadding = StringUtils.repeat(' ', lpad);
if (!valueType_.isStructType()) return leftPadding + toSql();
// Pass in the padding to make sure nested fields are aligned properly,
// even if we then strip the top-level padding.
String structStr = valueType_.prettyPrint(lpad);
structStr = structStr.substring(lpad);
return String.format("%sMAP<%s,%s>", leftPadding, keyType_.toSql(), structStr);
}
@Override
public void toThrift(TColumnType container) {
TTypeNode node = new TTypeNode();
@@ -14,6 +14,8 @@
package com.cloudera.impala.catalog;
import org.apache.commons.lang3.StringUtils;
import com.cloudera.impala.analysis.TypesUtil;
import com.cloudera.impala.thrift.TColumnType;
import com.cloudera.impala.thrift.TScalarType;
@@ -160,6 +162,11 @@ public String toSql() {
}
}
@Override
protected String prettyPrint(int lpad) {
return StringUtils.repeat(' ', lpad) + toSql();
}
@Override
public void toThrift(TColumnType container) {
TTypeNode node = new TTypeNode();
@@ -14,6 +14,8 @@
package com.cloudera.impala.catalog;
import org.apache.commons.lang3.StringUtils;
import com.cloudera.impala.thrift.TColumnType;
import com.cloudera.impala.thrift.TStructField;
import com.cloudera.impala.thrift.TTypeNode;
@@ -51,6 +53,24 @@ public String toSql() {
return sb.toString();
}
/**
* Pretty prints this field with lpad number of leading spaces.
* Calls prettyPrint(lpad) on this field's type.
*/
public String prettyPrint(int lpad) {
String leftPadding = StringUtils.repeat(' ', lpad);
StringBuilder sb = new StringBuilder(leftPadding + name_);
if (type_ != null) {
// Pass in the padding to make sure nested fields are aligned properly,
// even if we then strip the top-level padding.
String typeStr = type_.prettyPrint(lpad);
typeStr = typeStr.substring(lpad);
sb.append(":" + typeStr);
}
if (comment_ != null) sb.append(String.format(" COMMENT '%s'", comment_));
return sb.toString();
}
public void toThrift(TColumnType container, TTypeNode node) {
TStructField field = new TStructField();
field.setName(name_);
@@ -3,6 +3,8 @@
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.commons.lang3.StringUtils;
import com.cloudera.impala.thrift.TColumnType;
import com.cloudera.impala.thrift.TStructField;
import com.cloudera.impala.thrift.TTypeNode;
@@ -35,12 +37,19 @@ public StructType() {
@Override
public String toSql() {
ArrayList<String> fieldsSql = Lists.newArrayList();
for (StructField f: fields_) {
fieldsSql.add(f.toSql());
}
for (StructField f: fields_) fieldsSql.add(f.toSql());
return String.format("STRUCT<%s>", Joiner.on(",").join(fieldsSql));
}
@Override
protected String prettyPrint(int lpad) {
String leftPadding = StringUtils.repeat(' ', lpad);
ArrayList<String> fieldsSql = Lists.newArrayList();
for (StructField f: fields_) fieldsSql.add(f.prettyPrint(lpad + 2));
return String.format("%sSTRUCT<\n%s\n%s>",
leftPadding, Joiner.on(",\n").join(fieldsSql), leftPadding);
}
public void addField(StructField field) {
field.setPosition(fields_.size());
fields_.add(field);
@@ -111,11 +111,20 @@
/**
* The output of this is stored directly in the hive metastore as the column type.
* The string must match exactly.
* TODO: Consider adding another 'pretty print' method for complex types to use
* in describe/show statements.
*/
public abstract String toSql();
/**
* Same as toSql() but adds newlines and spaces for better readability of nested types.
*/
public String prettyPrint() { return prettyPrint(0); }
/**
* Pretty prints this type with lpad number of leading spaces. Used to implement
* prettyPrint() with space-indented nested types.
*/
protected abstract String prettyPrint(int lpad);
public boolean isInvalid() { return isScalarType(PrimitiveType.INVALID_TYPE); }
public boolean isValid() { return !isInvalid(); }
public boolean isNull() { return isScalarType(PrimitiveType.NULL_TYPE); }
@@ -56,7 +56,7 @@ private static TDescribeTableResult describeTableMinimal(Table table) {
TColumnValue colNameCol = new TColumnValue();
colNameCol.setString_val(column.getName());
TColumnValue dataTypeCol = new TColumnValue();
dataTypeCol.setString_val(column.getType().toSql().toLowerCase());
dataTypeCol.setString_val(column.getType().prettyPrint().toLowerCase());
TColumnValue commentCol = new TColumnValue();
commentCol.setString_val(column.getComment() != null ? column.getComment() : "");
descResult.results.add(
@@ -20,7 +20,6 @@
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -42,8 +41,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.impala.analysis.Analyzer;
import com.cloudera.impala.analysis.AnalysisContext;
import com.cloudera.impala.analysis.Analyzer;
import com.cloudera.impala.analysis.CreateDataSrcStmt;
import com.cloudera.impala.analysis.CreateDropRoleStmt;
import com.cloudera.impala.analysis.CreateUdaStmt;
@@ -161,7 +161,7 @@ describe t2
'c3','int','added a comment'
'x','array<int>',''
'y','map<string,float>','Map Col'
'z','struct<f1:boolean,f2:bigint>',''
'z','struct<\n f1:boolean,\n f2:bigint\n>',''
---- TYPES
string,string,string
====
@@ -177,7 +177,7 @@ c double)
describe t2
---- RESULTS
'a','int','Int Col'
'b','struct<f1:array<int>,f2:map<string,struct<f1:bigint>>>',''
'b','struct<\n f1:array<int>,\n f2:map<string,struct<\n f1:bigint\n >>\n>',''
'c','double',''
---- TYPES
string,string,string
@@ -750,4 +750,4 @@ show tables in alter_table_test_db2 like '%mv%'
---- RESULTS
---- TYPES
STRING
====
====
@@ -12,13 +12,13 @@ create table ddl_test_db.temp_legacy_table like parquet
describe ddl_test_db.temp_legacy_table
---- RESULTS
't_long','bigint','inferred from: required int64 t_long'
't_struct','struct<f_int:int,t_struct:struct<f_int:int,f_int2:int>,int_arr:array<int>,int_map:map<string,int>>','inferred from: required group t_struct {\n required int32 f_int;\n required group t_struct {\n required int32 f_int;\n optional int32 f_int2;\n }\n required group int_arr (LIST) {\n repeated int32 array;\n }\n required group int_map (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n }\n}'
't_struct','struct<\n f_int:int,\n t_struct:struct<\n f_int:int,\n f_int2:int\n >,\n int_arr:array<int>,\n int_map:map<string,int>\n>','inferred from: required group t_struct {\n required int32 f_int;\n required group t_struct {\n required int32 f_int;\n optional int32 f_int2;\n }\n required group int_arr (LIST) {\n repeated int32 array;\n }\n required group int_map (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n }\n}'
't_array_basic','array<int>','inferred from: required group t_array_basic (LIST) {\n repeated int32 array;\n}'
't_array_struct','array<struct<f_int1:int,f_int2:int,f_int3:int>>','inferred from: required group t_array_struct (LIST) {\n repeated group array {\n required int32 f_int1;\n required int32 f_int2;\n optional int32 f_int3;\n }\n}'
't_array_struct','array<struct<\n f_int1:int,\n f_int2:int,\n f_int3:int\n>>','inferred from: required group t_array_struct (LIST) {\n repeated group array {\n required int32 f_int1;\n required int32 f_int2;\n optional int32 f_int3;\n }\n}'
't_array_array','array<array<int>>','inferred from: required group t_array_array (LIST) {\n repeated group array (LIST) {\n repeated int32 array;\n }\n}'
't_array_map','array<map<string,int>>','inferred from: required group t_array_map (LIST) {\n repeated group array (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n }\n}'
'map_int','map<string,int>','inferred from: required group map_int (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n}'
'map_struct','map<string,struct<f_int:int,f_int2:int>>','inferred from: required group map_struct (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value {\n required int32 f_int;\n required int32 f_int2;\n }\n }\n}'
'map_struct','map<string,struct<\n f_int:int,\n f_int2:int\n>>','inferred from: required group map_struct (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value {\n required int32 f_int;\n required int32 f_int2;\n }\n }\n}'
'map_array','map<string,array<int>>','inferred from: required group map_array (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value (LIST) {\n repeated int32 array;\n }\n }\n}'
'map_map','map<string,map<string,int>>','inferred from: required group map_map (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n optional int32 value;\n }\n }\n }\n}'
---- TYPES
@@ -41,13 +41,13 @@ create table ddl_test_db.temp_modern_table like parquet
describe ddl_test_db.temp_modern_table
---- RESULTS
't_long','bigint','inferred from: required int64 t_long'
't_struct','struct<f_int:int,t_struct:struct<f_int:int,f_int2:int>,int_arr:array<int>,int_map:map<string,int>>','inferred from: required group t_struct {\n required int32 f_int;\n required group t_struct {\n required int32 f_int;\n optional int32 f_int2;\n }\n required group int_arr (LIST) {\n repeated group list {\n required int32 element;\n }\n }\n required group int_map (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n }\n}'
't_struct','struct<\n f_int:int,\n t_struct:struct<\n f_int:int,\n f_int2:int\n >,\n int_arr:array<int>,\n int_map:map<string,int>\n>','inferred from: required group t_struct {\n required int32 f_int;\n required group t_struct {\n required int32 f_int;\n optional int32 f_int2;\n }\n required group int_arr (LIST) {\n repeated group list {\n required int32 element;\n }\n }\n required group int_map (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n }\n}'
't_array_basic','array<int>','inferred from: required group t_array_basic (LIST) {\n repeated group list {\n required int32 element;\n }\n}'
't_array_struct','array<struct<f_int1:int,f_int2:int,f_int3:int>>','inferred from: required group t_array_struct (LIST) {\n repeated group list {\n required group element {\n required int32 f_int1;\n required int32 f_int2;\n optional int32 f_int3;\n }\n }\n}'
't_array_struct','array<struct<\n f_int1:int,\n f_int2:int,\n f_int3:int\n>>','inferred from: required group t_array_struct (LIST) {\n repeated group list {\n required group element {\n required int32 f_int1;\n required int32 f_int2;\n optional int32 f_int3;\n }\n }\n}'
't_array_array','array<array<int>>','inferred from: required group t_array_array (LIST) {\n repeated group list {\n required group element (LIST) {\n repeated group list {\n required int32 element;\n }\n }\n }\n}'
't_array_map','array<map<string,int>>','inferred from: required group t_array_map (LIST) {\n repeated group list {\n required group element (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n }\n }\n}'
'map_int','map<string,int>','inferred from: required group map_int (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required int32 value;\n }\n}'
'map_struct','map<string,struct<f_int:int,f_int2:int>>','inferred from: required group map_struct (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value {\n required int32 f_int;\n required int32 f_int2;\n }\n }\n}'
'map_struct','map<string,struct<\n f_int:int,\n f_int2:int\n>>','inferred from: required group map_struct (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value {\n required int32 f_int;\n required int32 f_int2;\n }\n }\n}'
'map_array','map<string,array<int>>','inferred from: required group map_array (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value (LIST) {\n repeated group list {\n optional int32 element;\n }\n }\n }\n}'
'map_map','map<string,map<string,int>>','inferred from: required group map_map (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n required group value (MAP) {\n repeated group map (MAP_KEY_VALUE) {\n required binary key (UTF8);\n optional int32 value;\n }\n }\n }\n}'
---- TYPES
@@ -606,15 +606,15 @@ describe allcomplextypes_clone
'int_array_col','array<int>',''
'array_array_col','array<array<int>>',''
'map_array_col','array<map<string,int>>',''
'struct_array_col','array<struct<f1:bigint,f2:string>>',''
'struct_array_col','array<struct<\n f1:bigint,\n f2:string\n>>',''
'int_map_col','map<string,int>',''
'array_map_col','map<string,array<int>>',''
'map_map_col','map<string,map<string,int>>',''
'struct_map_col','map<string,struct<f1:bigint,f2:string>>',''
'int_struct_col','struct<f1:int,f2:int>',''
'complex_struct_col','struct<f1:int,f2:array<int>,f3:map<string,int>>',''
'nested_struct_col','struct<f1:int,f2:struct<f11:bigint,f12:struct<f21:bigint>>>',''
'complex_nested_struct_col','struct<f1:int,f2:array<struct<f11:bigint,f12:map<string,struct<f21:bigint>>>>>',''
'struct_map_col','map<string,struct<\n f1:bigint,\n f2:string\n>>',''
'int_struct_col','struct<\n f1:int,\n f2:int\n>',''
'complex_struct_col','struct<\n f1:int,\n f2:array<int>,\n f3:map<string,int>\n>',''
'nested_struct_col','struct<\n f1:int,\n f2:struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n >\n>',''
'complex_nested_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:map<string,struct<\n f21:bigint\n >>\n >>\n>',''
'year','int',''
'month','int',''
---- TYPES
@@ -798,4 +798,4 @@ drop database test_drop_cascade_db cascade
---- QUERY
show databases like 'test_drop_cascade_db'
---- RESULTS
====
====
@@ -2,8 +2,6 @@
---- QUERY
# Simple describe (note Hive does not support this)
describe functional.alltypes
---- TYPES
string, string, string
---- RESULTS
'id','int','Add a comment'
'bool_col','boolean',''
@@ -18,15 +16,15 @@ string, string, string
'timestamp_col','timestamp',''
'year','int',''
'month','int',''
---- TYPES
string, string, string
====
---- QUERY
USE functional
====
---- QUERY
# Default database
describe alltypes
---- TYPES
string, string, string
---- RESULTS
'id','int','Add a comment'
'bool_col','boolean',''
@@ -41,26 +39,28 @@ string, string, string
'timestamp_col','timestamp',''
'year','int',''
'month','int',''
---- TYPES
string, string, string
====
---- QUERY
# Test printing of complex types.
describe functional.allcomplextypes
---- TYPES
string, string, string
---- RESULTS
'id','int',''
'int_array_col','array<int>',''
'array_array_col','array<array<int>>',''
'map_array_col','array<map<string,int>>',''
'struct_array_col','array<struct<f1:bigint,f2:string>>',''
'struct_array_col','array<struct<\n f1:bigint,\n f2:string\n>>',''
'int_map_col','map<string,int>',''
'array_map_col','map<string,array<int>>',''
'map_map_col','map<string,map<string,int>>',''
'struct_map_col','map<string,struct<f1:bigint,f2:string>>',''
'int_struct_col','struct<f1:int,f2:int>',''
'complex_struct_col','struct<f1:int,f2:array<int>,f3:map<string,int>>',''
'nested_struct_col','struct<f1:int,f2:struct<f11:bigint,f12:struct<f21:bigint>>>',''
'complex_nested_struct_col','struct<f1:int,f2:array<struct<f11:bigint,f12:map<string,struct<f21:bigint>>>>>',''
'struct_map_col','map<string,struct<\n f1:bigint,\n f2:string\n>>',''
'int_struct_col','struct<\n f1:int,\n f2:int\n>',''
'complex_struct_col','struct<\n f1:int,\n f2:array<int>,\n f3:map<string,int>\n>',''
'nested_struct_col','struct<\n f1:int,\n f2:struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n >\n>',''
'complex_nested_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:map<string,struct<\n f21:bigint\n >>\n >>\n>',''
'year','int',''
'month','int',''
====
---- TYPES
string, string, string
====

0 comments on commit 34ae7ad

Please sign in to comment.