Skip to content

Commit

Permalink
DRILL-7361: Support MAP (DICT) type in schema file
Browse files Browse the repository at this point in the history
closes #1967
  • Loading branch information
arina-ielchiieva authored and ihuzenko committed Feb 12, 2020
1 parent 806760b commit 4b85785
Show file tree
Hide file tree
Showing 9 changed files with 333 additions and 75 deletions.
Expand Up @@ -708,8 +708,15 @@ public void testDescribeStatement() throws Exception {

String statement = "CREATE OR REPLACE SCHEMA \n"
+ "(\n"
+ "`col1` DATE FORMAT 'yyyy-MM-dd' DEFAULT '-1', \n"
+ "`col2` INT NOT NULL FORMAT 'yyyy-MM-dd' PROPERTIES { 'drill.strict' = 'true', 'some_column_prop' = 'some_column_val' }\n"
+ "`col_date` DATE FORMAT 'yyyy-MM-dd' DEFAULT '-1', \n"
+ "`col_int` INT NOT NULL FORMAT 'yyyy-MM-dd' PROPERTIES { 'drill.strict' = 'true', 'some_column_prop' = 'some_column_val' }, \n"
+ "`col_array_int` ARRAY<INT>, \n"
+ "`col_nested_array_int` ARRAY<ARRAY<INT>>, \n"
+ "`col_map_required` MAP<INT, VARCHAR NOT NULL>, \n"
+ "`col_map_optional` MAP<INT, VARCHAR>, \n"
+ "`col_map_array` ARRAY<MAP<INT, VARCHAR>>, \n"
+ "`col_struct` STRUCT<`s1` INT, `s2` VARCHAR NOT NULL>, \n"
+ "`col_struct_array` ARRAY<STRUCT<`s1` INT, `s2` VARCHAR NOT NULL>>\n"
+ ") \n"
+ "FOR TABLE dfs.tmp.`table_describe_statement` \n"
+ "PROPERTIES (\n"
Expand Down
Expand Up @@ -31,14 +31,16 @@ columns: column_def (COMMA column_def)*;

column_def: column property_values?;

column: (primitive_column | struct_column | simple_array_column | complex_array_column);
column: (primitive_column | struct_column | map_column | simple_array_column | complex_array_column);

primitive_column: column_id simple_type nullability? format_value? default_value?;

simple_array_column: column_id simple_array_type nullability?;

struct_column: column_id struct_type nullability?;

map_column: column_id map_type nullability?;

complex_array_column: column_id complex_array_type nullability?;

column_id
Expand All @@ -63,14 +65,37 @@ simple_type
| INTERVAL # interval
;

complex_type: (simple_array_type | complex_array_type);
array_type: (simple_array_type | complex_array_type);

simple_array_type: ARRAY LEFT_ANGLE_BRACKET simple_array_value_type RIGHT_ANGLE_BRACKET;

simple_array_type: ARRAY LEFT_ANGLE_BRACKET (simple_type | struct_type) RIGHT_ANGLE_BRACKET;
simple_array_value_type
: simple_type # array_simple_type_def
| struct_type # array_struct_type_def
| map_type # array_map_type_def
;

complex_array_type: ARRAY LEFT_ANGLE_BRACKET complex_type RIGHT_ANGLE_BRACKET;
complex_array_type: ARRAY LEFT_ANGLE_BRACKET array_type RIGHT_ANGLE_BRACKET;

struct_type: STRUCT LEFT_ANGLE_BRACKET columns RIGHT_ANGLE_BRACKET;

map_type: MAP LEFT_ANGLE_BRACKET map_key_type_def COMMA map_value_type_def RIGHT_ANGLE_BRACKET;

map_key_type_def: map_key_type nullability?;

map_key_type
: simple_type # map_key_simple_type_def
;

map_value_type_def: map_value_type nullability?;

map_value_type
: simple_type # map_value_simple_type_def
| struct_type # map_value_struct_type_def
| map_type # map_value_map_type_def
| array_type # map_value_array_type_def
;

nullability: NOT NULL;

format_value: FORMAT string_value;
Expand Down
Expand Up @@ -22,8 +22,6 @@
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.MaterializedField;

import java.util.stream.Collectors;

/**
* Describes a base column type for map, dict, repeated map and repeated dict. All are tuples that have a tuple
* schema as part of the column definition.
Expand Down Expand Up @@ -100,43 +98,29 @@ public TupleMetadata parentTuple() {
@Override
public MaterializedField schema() {
MaterializedField field = emptySchema();
for (MaterializedField member : schema.toFieldList()) {
field.addChild(member);
}
schema.toFieldList().forEach(field::addChild);
return field;
}

@Override
public MaterializedField emptySchema() {
return MaterializedField.create(name,
MajorType.newBuilder()
.setMinorType(type)
.setMode(mode)
.build());
MajorType.newBuilder()
.setMinorType(type)
.setMode(mode)
.build());
}

@Override
public String typeString() {
StringBuilder builder = new StringBuilder();
if (isArray()) {
builder.append("ARRAY<");
}
builder.append(getStringType())
.append("<").append(
tupleSchema().toMetadataList().stream()
.map(ColumnMetadata::columnString)
.collect(Collectors.joining(", "))
)
.append(">");
if (isArray()) {
builder.append(">");
}
return builder.toString();
String typeString = internalTypeString();
return isArray() ? "ARRAY<" + typeString + ">" : typeString;
}

/**
* Returns string representation of type like {@code "STRUCT"} or {@code "MAP"}
* @return column type
* Returns specific type string representation of the type that extends this class.
*
* @return type string representation
*/
protected abstract String getStringType();
protected abstract String internalTypeString();
}
Expand Up @@ -21,6 +21,8 @@
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.vector.complex.DictVector;

import java.util.stream.Collectors;

public class DictColumnMetadata extends AbstractMapColumnMetadata {

/**
Expand Down Expand Up @@ -79,8 +81,32 @@ public boolean isDict() {
}

@Override
protected String getStringType() {
return "MAP";
protected String internalTypeString() {
StringBuilder builder = new StringBuilder()
.append("MAP<");

ColumnMetadata key = keyColumnMetadata();
ColumnMetadata value = valueColumnMetadata();

// sometimes dict key and value are added after creating metadata class,
// and if `typeString` method was called prematurely, for example, in case of error
// add whatever was added in a form of columns with key / value names
if (key == null || value == null) {
builder.append(tupleSchema().toMetadataList().stream()
.map(ColumnMetadata::columnString)
.collect(Collectors.joining(", ")));
} else {
builder.append(key.typeString())
.append(", ")
.append(value.typeString());

if (TypeProtos.DataMode.REQUIRED == value.mode()) {
builder.append(" NOT NULL");
}
}

builder.append(">");
return builder.toString();
}

@Override
Expand Down
Expand Up @@ -21,6 +21,8 @@
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.MaterializedField;

import java.util.stream.Collectors;

/**
* Describes a map and repeated map. Both are tuples that have a tuple
* schema as part of the column definition.
Expand Down Expand Up @@ -71,7 +73,11 @@ public boolean isMap() {
}

@Override
protected String getStringType() {
return "STRUCT";
protected String internalTypeString() {
return "STRUCT<"
+ tupleSchema().toMetadataList().stream()
.map(ColumnMetadata::columnString)
.collect(Collectors.joining(", "))
+ ">";
}
}

0 comments on commit 4b85785

Please sign in to comment.