From ea6aeb38075fb3684ec33dacdf6d7769ef6c7e5f Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Fri, 20 Feb 2015 17:56:06 -0800 Subject: [PATCH 01/12] TAJO-1353: CREATE TABLE should support the nested record definition. --- .../apache/tajo/algebra/ColumnDefinition.java | 10 +- .../org/apache/tajo/algebra/DataTypeExpr.java | 20 +- .../org/apache/tajo/datum/StructDatum.java | 57 ++++++ tajo-common/src/main/proto/DataTypes.proto | 8 +- .../org/apache/tajo/engine/parser/SQLLexer.g4 | 1 + .../apache/tajo/engine/parser/SQLParser.g4 | 14 +- .../tajo/engine/parser/SQLAnalyzer.java | 189 +++++++++++------- .../tajo/engine/parser/TestSQLAnalyzer.java | 10 + .../TestSQLAnalyzer/create_table_nested_1.sql | 1 + .../TestSQLAnalyzer/create_table_nested_2.sql | 1 + .../create_table_nested_1.result | 40 ++++ .../create_table_nested_2.result | 57 ++++++ 12 files changed, 330 insertions(+), 78 deletions(-) create mode 100644 tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java create mode 100644 tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql create mode 100644 tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql create mode 100644 tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result create mode 100644 tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java index e6e05d4cb0..80ecac4474 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java @@ -31,13 +31,21 @@ public ColumnDefinition(String columnName, String dataType) { public ColumnDefinition(String columnName, DataTypeExpr dataType) { super(dataType.getTypeName()); + + this.columnName = columnName; + + // precision and scale if (dataType.hasLengthOrPrecision()) { setLengthOrPrecision(dataType.lengthOrPrecision); if (dataType.hasScale()) { setScale(dataType.scale); } } - this.columnName = columnName; + + // nested records + if (dataType.isNestedRecordType()) { + this.nestedRecord = dataType.nestedRecord; + } } public String getColumnName() { diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java index 9dc795b216..309c88b5eb 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java @@ -21,6 +21,8 @@ import com.google.common.base.Objects; import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.util.TUtil; public class DataTypeExpr extends Expr { @@ -30,16 +32,30 @@ public class DataTypeExpr extends Expr { Integer lengthOrPrecision; @Expose @SerializedName("Scale") Integer scale; + @Expose @SerializedName("Struct") + DataTypeExpr [] nestedRecord; public DataTypeExpr(String typeName) { super(OpType.DataType); this.typeName = typeName; } + public DataTypeExpr(DataTypeExpr [] nestedRecordTypes) { + super(OpType.DataType); + // Please refer to DataTypes.proto. 'STRUCT' must be equivalent to Enum type in DataTypes.proto. + // STRUCT = 51; + this.typeName = Type.STRUCT.name(); + this.nestedRecord = nestedRecordTypes; + } + public String getTypeName() { return this.typeName; } + public boolean isNestedRecordType() { + return this.typeName.equals(Type.STRUCT.name()); + } + public boolean hasLengthOrPrecision() { return lengthOrPrecision != null; } @@ -74,7 +90,8 @@ boolean equalsTo(Expr expr) { DataTypeExpr another = (DataTypeExpr) expr; return typeName.equals(another.typeName) && TUtil.checkEquals(lengthOrPrecision, another.lengthOrPrecision) && - TUtil.checkEquals(scale, another.scale); + TUtil.checkEquals(scale, another.scale) && + TUtil.checkEquals(nestedRecord, another.nestedRecord); } @Override @@ -83,6 +100,7 @@ public Object clone() throws CloneNotSupportedException { dataType.typeName = typeName; dataType.lengthOrPrecision = lengthOrPrecision; dataType.scale = scale; + dataType.nestedRecord = nestedRecord; return dataType; } } diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java new file mode 100644 index 0000000000..241cad8b9e --- /dev/null +++ b/tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.datum; + +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.exception.InvalidOperationException; + +public class StructDatum extends Datum { + private Datum [] values; + + public StructDatum(Datum [] values) { + super(Type.STRUCT); + this.values = values; + } + + @Override + public int size() { + return 0; + } + + @Override + public int compareTo(Datum datum) { + if (datum instanceof StructDatum) { + StructDatum other = (StructDatum) datum; + int min = Math.min(values.length, other.values.length); + + for (int i = 0; i < min; i++) { + int compVal = values[i].compareTo(other.values[i]); + if (compVal != 0) { + return compVal; + } + } + + // the narrow width is regarded as lower one. + return values.length - other.values.length; + + } else { + throw new InvalidOperationException(datum.type()); + } + } +} diff --git a/tajo-common/src/main/proto/DataTypes.proto b/tajo-common/src/main/proto/DataTypes.proto index fb9c95725a..213e1a7223 100644 --- a/tajo-common/src/main/proto/DataTypes.proto +++ b/tajo-common/src/main/proto/DataTypes.proto @@ -59,9 +59,11 @@ enum Type { VARBINARY = 44; // variable-width binary strings BLOB = 45; - ANY = 51; // Any type - UDT = 52; // user-defined function - PROTOBUF = 53; // protocol buffer type + STRUCT = 51; // nested structure type + + ANY = 61; // Any type + UDT = 62; // user-defined function + PROTOBUF = 63; // protocol buffer type INET4 = 91; INET6 = 92; diff --git a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 index 0c144f7bab..a710f5b6bd 100644 --- a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 +++ b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 @@ -299,6 +299,7 @@ SET : S E T; SIMILAR : S I M I L A R; STDDEV_POP : S T D D E V UNDERLINE P O P; STDDEV_SAMP : S T D D E V UNDERLINE S A M P; +STRUCT : S T R U C T; SUBPARTITION : S U B P A R T I T I O N; SUM : S U M; diff --git a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 index a236514fcb..0f711dd046 100644 --- a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 +++ b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 @@ -309,6 +309,7 @@ nonreserved_keywords | SIMILAR | STDDEV_POP | STDDEV_SAMP + | STRUCT | SUBPARTITION | SUM | TABLESPACE @@ -430,10 +431,7 @@ predefined_type | bit_type | binary_type | network_type - ; - -network_type - : INET4 + | struct_type ; character_string_type @@ -525,6 +523,14 @@ binary_type | VARBINARY type_length? ; +network_type + : INET4 + ; + +struct_type + : STRUCT table_elements + ; + /* =============================================================================== 6.3 diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java b/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java index ab8e647e78..cbac0e48de 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java @@ -1288,7 +1288,10 @@ private ColumnDefinition[] getDefinitions(SQLParser.Table_elementsContext ctx) { ColumnDefinition[] elements = new ColumnDefinition[size]; for (int i = 0; i < size; i++) { String name = ctx.field_element(i).name.getText(); + + String dataTypeName = ctx.field_element(i).field_type().data_type().getText(); DataTypeExpr typeDef = visitData_type(ctx.field_element(i).field_type().data_type()); + Preconditions.checkNotNull(typeDef, dataTypeName + " is not handled correctly"); elements[i] = new ColumnDefinition(name, typeDef); } @@ -1359,12 +1362,17 @@ public DataTypeExpr visitData_type(SQLParser.Data_typeContext ctx) { SQLParser.Predefined_typeContext predefined_type = ctx.predefined_type(); DataTypeExpr typeDefinition = null; - if (predefined_type.character_string_type() != null) { - SQLParser.Character_string_typeContext character_string_type = - predefined_type.character_string_type(); - if ((character_string_type.CHARACTER() != null || character_string_type.CHAR() != null) && - character_string_type.VARYING() == null) { + // CHAR -> FIXED CHAR + // |- VARYING CHAR + // TEXT + if (checkIfExist(predefined_type.character_string_type())) { + + SQLParser.Character_string_typeContext character_string_type = predefined_type.character_string_type(); + + + if ((checkIfExist(character_string_type.CHARACTER()) || checkIfExist(character_string_type.CHAR())) && + !checkIfExist(character_string_type.VARYING())) { typeDefinition = new DataTypeExpr(Type.CHAR.name()); @@ -1373,8 +1381,7 @@ public DataTypeExpr visitData_type(SQLParser.Data_typeContext ctx) { Integer.parseInt(character_string_type.type_length().NUMBER().getText())); } - } else if (character_string_type.VARCHAR() != null - || character_string_type.VARYING() != null) { + } else if (checkIfExist(character_string_type.VARCHAR()) || checkIfExist(character_string_type.VARYING())) { typeDefinition = new DataTypeExpr(Type.VARCHAR.name()); @@ -1383,115 +1390,159 @@ public DataTypeExpr visitData_type(SQLParser.Data_typeContext ctx) { Integer.parseInt(character_string_type.type_length().NUMBER().getText())); } - } else if (character_string_type.TEXT() != null) { + } else if (checkIfExist(character_string_type.TEXT())) { typeDefinition = new DataTypeExpr(Type.TEXT.name()); } - } else if (predefined_type.national_character_string_type() != null) { - SQLParser.National_character_string_typeContext nchar_type = - predefined_type.national_character_string_type(); - if ((nchar_type.CHAR() != null || nchar_type.CHARACTER() != null - || nchar_type.NCHAR() != null) && nchar_type.VARYING() == null) { + // NCHAR + } else if (checkIfExist(predefined_type.national_character_string_type())) { + + National_character_string_typeContext nchar_type = predefined_type.national_character_string_type(); + + if ((checkIfExist(nchar_type.CHAR()) || checkIfExist(nchar_type.CHARACTER()) || + checkIfExist(nchar_type.NCHAR()) && !checkIfExist(nchar_type.VARYING()))) { + typeDefinition = new DataTypeExpr(Type.NCHAR.name()); - } else if (nchar_type.NVARCHAR() != null || nchar_type.VARYING() != null) { + + } else if (checkIfExist(nchar_type.NVARCHAR()) || checkIfExist(nchar_type.VARYING())) { + typeDefinition = new DataTypeExpr(Type.NVARCHAR.name()); } - if (nchar_type.type_length() != null) { - typeDefinition.setLengthOrPrecision( - Integer.parseInt(nchar_type.type_length().NUMBER().getText())); + // if a length is given + if (checkIfExist(nchar_type.type_length())) { + typeDefinition.setLengthOrPrecision(Integer.parseInt(nchar_type.type_length().NUMBER().getText())); } - } else if (predefined_type.binary_large_object_string_type() != null) { - SQLParser.Binary_large_object_string_typeContext blob_type = - predefined_type.binary_large_object_string_type(); + // BLOB types + } else if (checkIfExist(predefined_type.binary_large_object_string_type())) { + + Binary_large_object_string_typeContext blob_type = predefined_type.binary_large_object_string_type(); + typeDefinition = new DataTypeExpr(Type.BLOB.name()); - if (blob_type.type_length() != null) { - typeDefinition.setLengthOrPrecision( - Integer.parseInt(blob_type.type_length().NUMBER().getText())); + + if (checkIfExist(blob_type.type_length())) { + typeDefinition.setLengthOrPrecision(Integer.parseInt(blob_type.type_length().NUMBER().getText())); } - } else if (predefined_type.numeric_type() != null) { + + // NUMERIC types + } else if (checkIfExist(predefined_type.numeric_type())) { // exact number - if (predefined_type.numeric_type().exact_numeric_type() != null) { - SQLParser.Exact_numeric_typeContext exactType = - predefined_type.numeric_type().exact_numeric_type(); - if (exactType.TINYINT() != null || exactType.INT1() != null) { + if (checkIfExist(predefined_type.numeric_type().exact_numeric_type())) { + + Exact_numeric_typeContext exactType = predefined_type.numeric_type().exact_numeric_type(); + + if (checkIfExist(exactType.TINYINT()) || checkIfExist(exactType.INT1())) { typeDefinition = new DataTypeExpr(Type.INT1.name()); - } else if (exactType.INT2() != null || exactType.SMALLINT() != null) { + + } else if (checkIfExist(exactType.INT2()) || checkIfExist(exactType.SMALLINT())) { typeDefinition = new DataTypeExpr(Type.INT2.name()); - } else if (exactType.INT4() != null || exactType.INTEGER() != null || - exactType.INT() != null) { + + } else if (checkIfExist(exactType.INT4()) || + checkIfExist(exactType.INTEGER()) || + checkIfExist(exactType.INT())) { typeDefinition = new DataTypeExpr(Type.INT4.name()); - } else if (exactType.INT8() != null || exactType.BIGINT() != null) { + + } else if (checkIfExist(exactType.INT8()) || checkIfExist(exactType.BIGINT()) ) { typeDefinition = new DataTypeExpr(Type.INT8.name()); - } else if (exactType.NUMERIC() != null) { - typeDefinition = new DataTypeExpr(Type.NUMERIC.name()); - } else if (exactType.DECIMAL() != null || exactType.DEC() != null) { + + } else if (checkIfExist(exactType.NUMERIC()) || + checkIfExist(exactType.DECIMAL()) || + checkIfExist(exactType.DEC())) { typeDefinition = new DataTypeExpr(Type.NUMERIC.name()); - } - if (typeDefinition.getTypeName().equals(Type.NUMERIC.name())) { - if (exactType.precision_param() != null) { - if (exactType.precision_param().scale != null) { - typeDefinition.setScale( - Integer.parseInt(exactType.precision_param().scale.getText())); + if (checkIfExist(exactType.precision_param())) { + typeDefinition.setLengthOrPrecision(Integer.parseInt(exactType.precision_param().precision.getText())); + + if (checkIfExist(exactType.precision_param().scale)) { + typeDefinition.setScale(Integer.parseInt(exactType.precision_param().scale.getText())); } - typeDefinition.setLengthOrPrecision( - Integer.parseInt(exactType.precision_param().precision.getText())); } } + + } else { // approximate number - SQLParser.Approximate_numeric_typeContext approximateType = - predefined_type.numeric_type().approximate_numeric_type(); - if (approximateType.FLOAT() != null || approximateType.FLOAT4() != null - || approximateType.REAL() != null) { + Approximate_numeric_typeContext approximateType = predefined_type.numeric_type().approximate_numeric_type(); + if (checkIfExist(approximateType.FLOAT()) || + checkIfExist(approximateType.FLOAT4()) || + checkIfExist(approximateType.REAL())) { typeDefinition = new DataTypeExpr(Type.FLOAT4.name()); - } else if (approximateType.FLOAT8() != null || approximateType.DOUBLE() != null) { + + } else if (checkIfExist(approximateType.FLOAT8()) || checkIfExist(approximateType.DOUBLE())) { typeDefinition = new DataTypeExpr(Type.FLOAT8.name()); } } - } else if (predefined_type.boolean_type() != null) { + + } else if (checkIfExist(predefined_type.boolean_type())) { typeDefinition = new DataTypeExpr(Type.BOOLEAN.name()); - } else if (predefined_type.datetime_type() != null) { - SQLParser.Datetime_typeContext dateTimeType = predefined_type.datetime_type(); - if (dateTimeType.DATE() != null) { + + } else if (checkIfExist(predefined_type.datetime_type())) { + + Datetime_typeContext dateTimeType = predefined_type.datetime_type(); + if (checkIfExist(dateTimeType.DATE())) { typeDefinition = new DataTypeExpr(Type.DATE.name()); - } else if (dateTimeType.TIME(0) != null && dateTimeType.ZONE() == null) { - typeDefinition = new DataTypeExpr(Type.TIME.name()); - } else if ((dateTimeType.TIME(0) != null && dateTimeType.ZONE() != null) || - dateTimeType.TIMETZ() != null) { + + } else if (checkIfExist(dateTimeType.TIME(0))) { + if (checkIfExist(dateTimeType.ZONE())) { + typeDefinition = new DataTypeExpr(Type.TIMEZ.name()); + } else { + typeDefinition = new DataTypeExpr(Type.TIME.name()); + } + + } else if (checkIfExist(dateTimeType.TIMETZ())) { typeDefinition = new DataTypeExpr(Type.TIMEZ.name()); - } else if (dateTimeType.TIMESTAMP() != null && dateTimeType.ZONE() == null) { - typeDefinition = new DataTypeExpr(Type.TIMESTAMP.name()); - } else if ((dateTimeType.TIMESTAMP() != null && dateTimeType.ZONE() != null) || - dateTimeType.TIMESTAMPTZ() != null) { + + } else if (checkIfExist(dateTimeType.TIMESTAMP())) { + if (checkIfExist(dateTimeType.ZONE())) { + typeDefinition = new DataTypeExpr(Type.TIMESTAMPZ.name()); + } else { + typeDefinition = new DataTypeExpr(Type.TIMESTAMP.name()); + } + + } else if (checkIfExist(dateTimeType.TIMESTAMPTZ())) { typeDefinition = new DataTypeExpr(Type.TIMESTAMPZ.name()); } + + // bit data types } else if (predefined_type.bit_type() != null) { - SQLParser.Bit_typeContext bitType = predefined_type.bit_type(); - if (bitType.VARBIT() != null || bitType.VARYING() != null) { + Bit_typeContext bitType = predefined_type.bit_type(); + + if (checkIfExist(bitType.VARBIT()) || checkIfExist(bitType.VARYING())) { typeDefinition = new DataTypeExpr(Type.VARBIT.name()); + } else { typeDefinition = new DataTypeExpr(Type.BIT.name()); } - if (bitType.type_length() != null) { + + if (checkIfExist(bitType.type_length())) { typeDefinition.setLengthOrPrecision( Integer.parseInt(bitType.type_length().NUMBER().getText())); } - } else if (predefined_type.binary_type() != null) { + + + // binary data types + } else if (checkIfExist(predefined_type.binary_type())) { SQLParser.Binary_typeContext binaryType = predefined_type.binary_type(); - if (binaryType.VARBINARY() != null || binaryType.VARYING() != null) { + + if (checkIfExist(binaryType.VARBINARY()) || checkIfExist(binaryType.VARYING())) { typeDefinition = new DataTypeExpr(Type.VARBINARY.name()); } else { typeDefinition = new DataTypeExpr(Type.BINARY.name()); } - if (binaryType.type_length() != null) { - typeDefinition.setLengthOrPrecision( - Integer.parseInt(binaryType.type_length().NUMBER().getText())); + + if (checkIfExist(binaryType.type_length())) { + typeDefinition.setLengthOrPrecision(Integer.parseInt(binaryType.type_length().NUMBER().getText())); } - } else if (predefined_type.network_type() != null) { + + // inet + } else if (checkIfExist(predefined_type.network_type())) { typeDefinition = new DataTypeExpr(Type.INET4.name()); + + + } else if (checkIfExist(predefined_type.struct_type())) { + ColumnDefinition [] nestedRecordDefines = getDefinitions(predefined_type.struct_type().table_elements()); + typeDefinition = new DataTypeExpr(nestedRecordDefines); } return typeDefinition; diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/parser/TestSQLAnalyzer.java b/tajo-core/src/test/java/org/apache/tajo/engine/parser/TestSQLAnalyzer.java index 9dfe814ff7..d223554876 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/parser/TestSQLAnalyzer.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/parser/TestSQLAnalyzer.java @@ -710,4 +710,14 @@ public void testSetSession6() throws IOException { public void testSetSession7() throws IOException { assertParseResult("setsession7.sql", "setsession7.result"); } + + @Test + public void testCreateTableWithNested1() throws IOException { + assertParseResult("create_table_nested_1.sql", "create_table_nested_1.result"); + } + + @Test + public void testCreateTableWithNested2() throws IOException { + assertParseResult("create_table_nested_2.sql", "create_table_nested_2.result"); + } } diff --git a/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql new file mode 100644 index 0000000000..3942171f3e --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql @@ -0,0 +1 @@ +CREATE TABLE T1 (A TEXT, B INT4, C STRUCT (D TEXT, E INT8), F FLOAT8); \ No newline at end of file diff --git a/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql new file mode 100644 index 0000000000..9eaa00717a --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql @@ -0,0 +1 @@ +CREATE TABLE T1 (A TEXT, B INT4, C STRUCT (D TEXT, E INT8, F STRUCT (G INT1, H FLOAT4)), Z FLOAT8); \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result new file mode 100644 index 0000000000..4223a88663 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result @@ -0,0 +1,40 @@ +{ + "IsExternal": false, + "TableName": "t1", + "Attributes": [ + { + "ColumnDefName": "a", + "DataTypeName": "TEXT", + "OpType": "DataType" + }, + { + "ColumnDefName": "b", + "DataTypeName": "INT4", + "OpType": "DataType" + }, + { + "ColumnDefName": "c", + "DataTypeName": "STRUCT", + "Struct": [ + { + "ColumnDefName": "d", + "DataTypeName": "TEXT", + "OpType": "DataType" + }, + { + "ColumnDefName": "e", + "DataTypeName": "INT8", + "OpType": "DataType" + } + ], + "OpType": "DataType" + }, + { + "ColumnDefName": "f", + "DataTypeName": "FLOAT8", + "OpType": "DataType" + } + ], + "IfNotExists": false, + "OpType": "CreateTable" +} \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result new file mode 100644 index 0000000000..af60276aa5 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result @@ -0,0 +1,57 @@ +{ + "IsExternal": false, + "TableName": "t1", + "Attributes": [ + { + "ColumnDefName": "a", + "DataTypeName": "TEXT", + "OpType": "DataType" + }, + { + "ColumnDefName": "b", + "DataTypeName": "INT4", + "OpType": "DataType" + }, + { + "ColumnDefName": "c", + "DataTypeName": "STRUCT", + "Struct": [ + { + "ColumnDefName": "d", + "DataTypeName": "TEXT", + "OpType": "DataType" + }, + { + "ColumnDefName": "e", + "DataTypeName": "INT8", + "OpType": "DataType" + }, + { + "ColumnDefName": "f", + "DataTypeName": "STRUCT", + "Struct": [ + { + "ColumnDefName": "g", + "DataTypeName": "INT1", + "OpType": "DataType" + }, + { + "ColumnDefName": "h", + "DataTypeName": "FLOAT4", + "OpType": "DataType" + } + ], + "OpType": "DataType" + } + ], + "OpType": "DataType" + }, + { + "ColumnDefName": "z", + "DataTypeName": "FLOAT8", + "OpType": "DataType" + } + ], + "IfNotExists": false, + "OpType": "CreateTable" +} \ No newline at end of file From e3ad19f461739d5cc20b7d0b0cfac0f7f16ee2e5 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Fri, 20 Feb 2015 18:27:06 -0800 Subject: [PATCH 02/12] Changed the type name 'struct' to 'record'. --- .../org/apache/tajo/algebra/DataTypeExpr.java | 6 +- .../org/apache/tajo/datum/StructDatum.java | 57 ------------------- tajo-common/src/main/proto/DataTypes.proto | 2 +- .../org/apache/tajo/engine/parser/SQLLexer.g4 | 2 +- .../apache/tajo/engine/parser/SQLParser.g4 | 8 +-- .../tajo/engine/parser/SQLAnalyzer.java | 4 +- .../TestSQLAnalyzer/create_table_nested_1.sql | 2 +- .../TestSQLAnalyzer/create_table_nested_2.sql | 2 +- .../create_table_nested_1.result | 4 +- .../create_table_nested_2.result | 8 +-- 10 files changed, 19 insertions(+), 76 deletions(-) delete mode 100644 tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java index 309c88b5eb..28f9a5e0da 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java @@ -32,7 +32,7 @@ public class DataTypeExpr extends Expr { Integer lengthOrPrecision; @Expose @SerializedName("Scale") Integer scale; - @Expose @SerializedName("Struct") + @Expose @SerializedName("Record") DataTypeExpr [] nestedRecord; public DataTypeExpr(String typeName) { @@ -44,7 +44,7 @@ public DataTypeExpr(DataTypeExpr [] nestedRecordTypes) { super(OpType.DataType); // Please refer to DataTypes.proto. 'STRUCT' must be equivalent to Enum type in DataTypes.proto. // STRUCT = 51; - this.typeName = Type.STRUCT.name(); + this.typeName = Type.RECORD.name(); this.nestedRecord = nestedRecordTypes; } @@ -53,7 +53,7 @@ public String getTypeName() { } public boolean isNestedRecordType() { - return this.typeName.equals(Type.STRUCT.name()); + return this.typeName.equals(Type.RECORD.name()); } public boolean hasLengthOrPrecision() { diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java deleted file mode 100644 index 241cad8b9e..0000000000 --- a/tajo-common/src/main/java/org/apache/tajo/datum/StructDatum.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.datum; - -import org.apache.tajo.common.TajoDataTypes.Type; -import org.apache.tajo.exception.InvalidOperationException; - -public class StructDatum extends Datum { - private Datum [] values; - - public StructDatum(Datum [] values) { - super(Type.STRUCT); - this.values = values; - } - - @Override - public int size() { - return 0; - } - - @Override - public int compareTo(Datum datum) { - if (datum instanceof StructDatum) { - StructDatum other = (StructDatum) datum; - int min = Math.min(values.length, other.values.length); - - for (int i = 0; i < min; i++) { - int compVal = values[i].compareTo(other.values[i]); - if (compVal != 0) { - return compVal; - } - } - - // the narrow width is regarded as lower one. - return values.length - other.values.length; - - } else { - throw new InvalidOperationException(datum.type()); - } - } -} diff --git a/tajo-common/src/main/proto/DataTypes.proto b/tajo-common/src/main/proto/DataTypes.proto index 213e1a7223..04f1e12439 100644 --- a/tajo-common/src/main/proto/DataTypes.proto +++ b/tajo-common/src/main/proto/DataTypes.proto @@ -59,7 +59,7 @@ enum Type { VARBINARY = 44; // variable-width binary strings BLOB = 45; - STRUCT = 51; // nested structure type + RECORD = 51; // nested structure type ANY = 61; // Any type UDT = 62; // user-defined function diff --git a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 index a710f5b6bd..f42e114704 100644 --- a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 +++ b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLLexer.g4 @@ -284,6 +284,7 @@ QUARTER : Q U A R T E R; RANGE : R A N G E; RANK : R A N K; +RECORD : R E C O R D; REGEXP : R E G E X P; RENAME : R E N A M E; RESET : R E S E T; @@ -299,7 +300,6 @@ SET : S E T; SIMILAR : S I M I L A R; STDDEV_POP : S T D D E V UNDERLINE P O P; STDDEV_SAMP : S T D D E V UNDERLINE S A M P; -STRUCT : S T R U C T; SUBPARTITION : S U B P A R T I T I O N; SUM : S U M; diff --git a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 index 0f711dd046..420bf46e6e 100644 --- a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 +++ b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 @@ -295,6 +295,7 @@ nonreserved_keywords | QUARTER | RANGE | RANK + | RECORD | REGEXP | RENAME | RESET @@ -309,7 +310,6 @@ nonreserved_keywords | SIMILAR | STDDEV_POP | STDDEV_SAMP - | STRUCT | SUBPARTITION | SUM | TABLESPACE @@ -431,7 +431,7 @@ predefined_type | bit_type | binary_type | network_type - | struct_type + | record_type ; character_string_type @@ -527,8 +527,8 @@ network_type : INET4 ; -struct_type - : STRUCT table_elements +record_type + : RECORD table_elements ; /* diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java b/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java index cbac0e48de..5b4054f101 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java @@ -1540,8 +1540,8 @@ public DataTypeExpr visitData_type(SQLParser.Data_typeContext ctx) { typeDefinition = new DataTypeExpr(Type.INET4.name()); - } else if (checkIfExist(predefined_type.struct_type())) { - ColumnDefinition [] nestedRecordDefines = getDefinitions(predefined_type.struct_type().table_elements()); + } else if (checkIfExist(predefined_type.record_type())) { + ColumnDefinition [] nestedRecordDefines = getDefinitions(predefined_type.record_type().table_elements()); typeDefinition = new DataTypeExpr(nestedRecordDefines); } diff --git a/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql index 3942171f3e..bdf76eb95a 100644 --- a/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql +++ b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_1.sql @@ -1 +1 @@ -CREATE TABLE T1 (A TEXT, B INT4, C STRUCT (D TEXT, E INT8), F FLOAT8); \ No newline at end of file +CREATE TABLE T1 (A TEXT, B INT4, C RECORD (D TEXT, E INT8), F FLOAT8); \ No newline at end of file diff --git a/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql index 9eaa00717a..0bfdc11d69 100644 --- a/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql +++ b/tajo-core/src/test/resources/queries/TestSQLAnalyzer/create_table_nested_2.sql @@ -1 +1 @@ -CREATE TABLE T1 (A TEXT, B INT4, C STRUCT (D TEXT, E INT8, F STRUCT (G INT1, H FLOAT4)), Z FLOAT8); \ No newline at end of file +CREATE TABLE T1 (A TEXT, B INT4, C RECORD (D TEXT, E INT8, F RECORD (G INT1, H FLOAT4)), Z FLOAT8); \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result index 4223a88663..d609790c60 100644 --- a/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result +++ b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_1.result @@ -14,8 +14,8 @@ }, { "ColumnDefName": "c", - "DataTypeName": "STRUCT", - "Struct": [ + "DataTypeName": "RECORD", + "Record": [ { "ColumnDefName": "d", "DataTypeName": "TEXT", diff --git a/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result index af60276aa5..4c4b34328b 100644 --- a/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result +++ b/tajo-core/src/test/resources/results/TestSQLAnalyzer/create_table_nested_2.result @@ -14,8 +14,8 @@ }, { "ColumnDefName": "c", - "DataTypeName": "STRUCT", - "Struct": [ + "DataTypeName": "RECORD", + "Record": [ { "ColumnDefName": "d", "DataTypeName": "TEXT", @@ -28,8 +28,8 @@ }, { "ColumnDefName": "f", - "DataTypeName": "STRUCT", - "Struct": [ + "DataTypeName": "RECORD", + "Record": [ { "ColumnDefName": "g", "DataTypeName": "INT1", From bb39a913b8ac8230eb97d65858cf93f95ffd68fc Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Fri, 20 Feb 2015 23:47:33 -0800 Subject: [PATCH 03/12] Introduced TypeDesc which contains DataType and NestedSchema. --- .../apache/tajo/algebra/ColumnDefinition.java | 2 +- .../org/apache/tajo/algebra/DataTypeExpr.java | 15 ++-- .../java/org/apache/tajo/catalog/Column.java | 32 +++++---- .../org/apache/tajo/catalog/TypeDesc.java | 72 +++++++++++++++++++ tajo-common/src/main/proto/DataTypes.proto | 8 +++ .../org/apache/tajo/plan/ExprAnnotator.java | 5 +- .../org/apache/tajo/plan/LogicalPlanner.java | 18 +++-- .../org/apache/tajo/plan/TypeDeterminant.java | 4 +- 8 files changed, 128 insertions(+), 28 deletions(-) create mode 100644 tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java index 80ecac4474..f8ea0f1de9 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/ColumnDefinition.java @@ -44,7 +44,7 @@ public ColumnDefinition(String columnName, DataTypeExpr dataType) { // nested records if (dataType.isNestedRecordType()) { - this.nestedRecord = dataType.nestedRecord; + this.nestedRecordTypes = dataType.nestedRecordTypes; } } diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java index 28f9a5e0da..96c9adbd7a 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java @@ -21,7 +21,6 @@ import com.google.common.base.Objects; import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; -import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.util.TUtil; @@ -33,19 +32,19 @@ public class DataTypeExpr extends Expr { @Expose @SerializedName("Scale") Integer scale; @Expose @SerializedName("Record") - DataTypeExpr [] nestedRecord; + ColumnDefinition [] nestedRecordTypes; public DataTypeExpr(String typeName) { super(OpType.DataType); this.typeName = typeName; } - public DataTypeExpr(DataTypeExpr [] nestedRecordTypes) { + public DataTypeExpr(ColumnDefinition [] nestedRecordTypes) { super(OpType.DataType); // Please refer to DataTypes.proto. 'STRUCT' must be equivalent to Enum type in DataTypes.proto. // STRUCT = 51; this.typeName = Type.RECORD.name(); - this.nestedRecord = nestedRecordTypes; + this.nestedRecordTypes = nestedRecordTypes; } public String getTypeName() { @@ -56,6 +55,10 @@ public boolean isNestedRecordType() { return this.typeName.equals(Type.RECORD.name()); } + public ColumnDefinition [] getNestedRecordTypes() { + return nestedRecordTypes; + } + public boolean hasLengthOrPrecision() { return lengthOrPrecision != null; } @@ -91,7 +94,7 @@ boolean equalsTo(Expr expr) { return typeName.equals(another.typeName) && TUtil.checkEquals(lengthOrPrecision, another.lengthOrPrecision) && TUtil.checkEquals(scale, another.scale) && - TUtil.checkEquals(nestedRecord, another.nestedRecord); + TUtil.checkEquals(nestedRecordTypes, another.nestedRecordTypes); } @Override @@ -100,7 +103,7 @@ public Object clone() throws CloneNotSupportedException { dataType.typeName = typeName; dataType.lengthOrPrecision = lengthOrPrecision; dataType.scale = scale; - dataType.nestedRecord = nestedRecord; + dataType.nestedRecordTypes = nestedRecordTypes; return dataType; } } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java index aceb6f1383..701a8f0199 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java @@ -32,7 +32,18 @@ */ public class Column implements ProtoObject, GsonObject { @Expose protected String name; - @Expose protected DataType dataType; + @Expose protected TypeDesc typeDesc; + + /** + * Column Constructor + * + * @param name field name + * @param typeDesc Type description + */ + public Column(String name, TypeDesc typeDesc) { + this.name = name; + this.typeDesc = typeDesc; + } /** * @@ -40,8 +51,7 @@ public class Column implements ProtoObject, GsonObject { * @param dataType Data Type with length */ public Column(String name, DataType dataType) { - this.name = name; - this.dataType = dataType; + this(name, new TypeDesc(dataType)); } /** @@ -65,7 +75,7 @@ public Column(String name, TajoDataTypes.Type type, int typeLength) { public Column(ColumnProto proto) { name = proto.getName(); - dataType = proto.getDataType(); + typeDesc = new TypeDesc(proto.getDataType()); } /** @@ -105,20 +115,20 @@ public String getSimpleName() { * @return DataType which includes domain type and scale. */ public DataType getDataType() { - return this.dataType; + return this.typeDesc.dataType; } @Override public boolean equals(Object o) { if (o instanceof Column) { Column another = (Column)o; - return name.equals(another.name) && dataType.equals(another.dataType); + return name.equals(another.name) && typeDesc.equals(another.typeDesc); } return false; } public int hashCode() { - return Objects.hashCode(name, dataType); + return Objects.hashCode(name, typeDesc); } @@ -128,16 +138,12 @@ public int hashCode() { */ @Override public ColumnProto getProto() { - return ColumnProto.newBuilder().setName(this.name).setDataType(this.dataType).build(); + return ColumnProto.newBuilder().setName(this.name).setDataType(this.typeDesc.getDataType()).build(); } public String toString() { StringBuilder sb = new StringBuilder(getQualifiedName()); - sb.append(" (").append(getDataType().getType()); - if (getDataType().getLength() > 0) { - sb.append("(" + getDataType().getLength() + ")"); - } - sb.append(")"); + sb.append(" (").append(typeDesc.toString()).append(")"); return sb.toString(); } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java new file mode 100644 index 0000000000..d82874b571 --- /dev/null +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.catalog; + +import com.google.common.base.Objects; +import com.google.gson.annotations.Expose; +import org.apache.tajo.common.TajoDataTypes.DataType; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.util.TUtil; + +public class TypeDesc { + @Expose protected DataType dataType; + @Expose protected Schema nestedRecordSchema; // NULL unless type is RECORD. + + public TypeDesc(DataType dataType) { + this.dataType = dataType; + } + + public TypeDesc(Schema recordSchema) { + this.dataType = CatalogUtil.newSimpleDataType(Type.RECORD); + this.nestedRecordSchema = recordSchema; + } + + public DataType getDataType() { + return dataType; + } + + public boolean equals(Object obj) { + if (obj instanceof TypeDesc) { + TypeDesc other = (TypeDesc) obj; + return this.dataType.equals(other.dataType) && + TUtil.checkEquals(nestedRecordSchema, other.nestedRecordSchema); + + } else { + return false; + } + } + + public int hashCode() { + return Objects.hashCode(dataType.hashCode(), nestedRecordSchema); + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + + if (dataType.getType() == Type.RECORD) { + sb.append("RECORD (").append(nestedRecordSchema.toString()).append(")"); + } else { + sb.append(dataType.getType().name()); + if (dataType.getLength() > 0) { + sb.append("(" + dataType.getLength() + ")"); + } + } + return sb.toString(); + } +} diff --git a/tajo-common/src/main/proto/DataTypes.proto b/tajo-common/src/main/proto/DataTypes.proto index 04f1e12439..73948f18b8 100644 --- a/tajo-common/src/main/proto/DataTypes.proto +++ b/tajo-common/src/main/proto/DataTypes.proto @@ -102,4 +102,12 @@ message DataType { required Type type = 1; optional int32 length = 2; optional string code = 3; + + /** + * Nested fields. Since Protobuf does not support nested fields, + * the nesting is flattened to a single list by a depth-first traversal. + * The children count is used to construct the nested relationship. + * This field is not set when the element is a primitive type + */ + optional int32 num_children = 4; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/ExprAnnotator.java b/tajo-plan/src/main/java/org/apache/tajo/plan/ExprAnnotator.java index 235bebf25e..5166e80315 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/ExprAnnotator.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/ExprAnnotator.java @@ -793,10 +793,11 @@ public EvalNode visitCastExpr(Context ctx, Stack stack, CastExpr expr) thr } return new ConstEval( - DatumFactory.cast(constEval.getValue(), LogicalPlanner.convertDataType(expr.getTarget()), tz)); + DatumFactory.cast(constEval.getValue(), + LogicalPlanner.convertDataType(expr.getTarget()).getDataType(), tz)); } else { - return new CastEval(ctx.queryContext, child, LogicalPlanner.convertDataType(expr.getTarget())); + return new CastEval(ctx.queryContext, child, LogicalPlanner.convertDataType(expr.getTarget()).getDataType()); } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index babcb1ef24..3a6c57c441 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -1860,7 +1860,7 @@ private Schema convertColumnsToSchema(ColumnDefinition[] elements) { * @param elements to be transformed * @return schema transformed from table definition elements */ - private Schema convertTableElementsSchema(ColumnDefinition[] elements) { + private static Schema convertTableElementsSchema(ColumnDefinition[] elements) { Schema schema = new Schema(); for (ColumnDefinition columnDefinition: elements) { @@ -1870,19 +1870,29 @@ private Schema convertTableElementsSchema(ColumnDefinition[] elements) { return schema; } - private Column convertColumn(ColumnDefinition columnDefinition) { + private static Column convertColumn(ColumnDefinition columnDefinition) { return new Column(columnDefinition.getColumnName(), convertDataType(columnDefinition)); } - public static TajoDataTypes.DataType convertDataType(DataTypeExpr dataType) { + public static TypeDesc convertDataType(DataTypeExpr dataType) { TajoDataTypes.Type type = TajoDataTypes.Type.valueOf(dataType.getTypeName()); TajoDataTypes.DataType.Builder builder = TajoDataTypes.DataType.newBuilder(); builder.setType(type); + if (dataType.hasLengthOrPrecision()) { builder.setLength(dataType.getLengthOrPrecision()); } - return builder.build(); + + TypeDesc typeDesc; + if (type == TajoDataTypes.Type.RECORD) { + Schema nestedRecordSchema = convertTableElementsSchema(dataType.getNestedRecordTypes()); + typeDesc = new TypeDesc(nestedRecordSchema); + } else { + typeDesc = new TypeDesc(builder.build()); + } + + return typeDesc; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/TypeDeterminant.java b/tajo-plan/src/main/java/org/apache/tajo/plan/TypeDeterminant.java index 6222734105..7c468bb385 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/TypeDeterminant.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/TypeDeterminant.java @@ -59,7 +59,7 @@ public DataType visitUnaryOperator(LogicalPlanner.PlanContext ctx, Stack s dataType = BOOL_TYPE; break; case Cast: - dataType = LogicalPlanner.convertDataType(((CastExpr)expr).getTarget()); + dataType = LogicalPlanner.convertDataType(((CastExpr)expr).getTarget()).getDataType(); break; default: dataType = visit(ctx, stack, expr.getChild()); @@ -270,7 +270,7 @@ public DataType visitWindowFunction(LogicalPlanner.PlanContext ctx, Stack @Override public DataType visitDataType(LogicalPlanner.PlanContext ctx, Stack stack, DataTypeExpr expr) throws PlanningException { - return LogicalPlanner.convertDataType(expr); + return LogicalPlanner.convertDataType(expr).getDataType(); } @Override From a21c110c6a48f129d9c56c8680db5eea593d9076 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Sun, 22 Feb 2015 15:48:15 -0800 Subject: [PATCH 04/12] Introduced nested record to schema almostly. --- .../org/apache/tajo/catalog/CatalogUtil.java | 7 + .../java/org/apache/tajo/catalog/Column.java | 6 +- .../java/org/apache/tajo/catalog/Schema.java | 140 ++++++++++++------ .../org/apache/tajo/catalog/SchemaUtil.java | 30 ++++ .../org/apache/tajo/catalog/TestSchema.java | 58 ++++++++ .../dictionary/ColumnsTableDescriptor.java | 1 + .../tajo/catalog/store/AbstractDBStore.java | 62 +++++--- .../main/resources/schemas/derby/derby.xml | 1 + .../resources/schemas/mariadb/columns.sql | 1 + .../main/resources/schemas/mysql/columns.sql | 1 + .../main/resources/schemas/oracle/columns.sql | 1 + .../resources/schemas/postgresql/columns.sql | 1 + .../org/apache/tajo/catalog/TestCatalog.java | 34 +++++ tajo-common/src/main/proto/DataTypes.proto | 16 +- .../tajo/engine/query/TestCreateTable.java | 12 ++ .../TestSelectQuery/testExplainSelect.result | 4 +- 16 files changed, 299 insertions(+), 76 deletions(-) diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java index 45609d0c87..b919f56c68 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java @@ -354,6 +354,13 @@ public static DataType newSimpleDataType(Type type) { return DataType.newBuilder().setType(type).build(); } + public static DataType newRecordType(int nestedFieldNum) { + DataType.Builder builder = DataType.newBuilder(); + builder.setType(Type.RECORD); + builder.setNumChildren(nestedFieldNum); + return builder.build(); + } + public static DataType [] newSimpleDataTypeArray(Type... types) { DataType [] dataTypes = new DataType[types.length]; for (int i = 0; i < types.length; i++) { diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java index 701a8f0199..5fc87de10b 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java @@ -138,7 +138,11 @@ public int hashCode() { */ @Override public ColumnProto getProto() { - return ColumnProto.newBuilder().setName(this.name).setDataType(this.typeDesc.getDataType()).build(); + ColumnProto.Builder builder = ColumnProto.newBuilder(); + builder + .setName(this.name) + .setDataType(this.typeDesc.getDataType()); + return builder.build(); } public String toString() { diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java index 71c1b01dc2..44a3e45d3f 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java @@ -23,6 +23,7 @@ import com.google.gson.annotations.Expose; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.tajo.catalog.SchemaUtil.ColumnVisitor; import org.apache.tajo.catalog.exception.AlreadyExistsFieldException; import org.apache.tajo.catalog.json.CatalogGsonHelper; import org.apache.tajo.catalog.proto.CatalogProtos.ColumnProto; @@ -47,34 +48,57 @@ public Schema() { } public Schema(SchemaProto proto) { - this.fields = new ArrayList(); - this.fieldsByQualifiedName = new HashMap(); - this.fieldsByName = new HashMap>(); - for(ColumnProto colProto : proto.getFieldsList()) { - Column tobeAdded = new Column(colProto); - fields.add(tobeAdded); - if (tobeAdded.hasQualifier()) { - fieldsByQualifiedName.put(tobeAdded.getQualifier() + "." + tobeAdded.getSimpleName(), fields.size() - 1); - } else { - fieldsByQualifiedName.put(tobeAdded.getSimpleName(), fields.size() - 1); - } - if (fieldsByName.containsKey(tobeAdded.getSimpleName())) { - fieldsByName.get(tobeAdded.getSimpleName()).add(fields.size() - 1); - } else { - fieldsByName.put(tobeAdded.getSimpleName(), TUtil.newList(fields.size() - 1)); - } + init(); + + List toBeAdded = TUtil.newList(); + for (int i = 0; i < proto.getFieldsCount(); i++) { + deserializeColumn(toBeAdded, proto.getFieldsList(), i); + } + + for (Column c : toBeAdded) { + addColumn(c); + } + } + + private static void deserializeColumn(List fields, List protos, int serializedColumnIndex) { + ColumnProto columnProto = protos.get(serializedColumnIndex); + if (columnProto.getDataType().getType() == Type.RECORD) { + + // Get the number of child fields + int childNum = columnProto.getDataType().getNumChildren(); + // where is start index of nested fields? + int childStartIndex = fields.size() - columnProto.getDataType().getNumChildren(); + // Extract nested fields + List nestedColumns = TUtil.newList(fields.subList(childStartIndex, childStartIndex + childNum)); + // Remove nested fields from the the current level + fields.removeAll(nestedColumns); + + // Add the nested fields to the list as a single record column + fields.add(new Column(columnProto.getName(), new TypeDesc(new Schema(nestedColumns)))); + } else { + fields.add(new Column(protos.get(serializedColumnIndex))); } } public Schema(Schema schema) { this(); + this.fields.addAll(schema.fields); this.fieldsByQualifiedName.putAll(schema.fieldsByQualifiedName); this.fieldsByName.putAll(schema.fieldsByName); } - public Schema(Column [] columns) { + public Schema(Column [] columns) { + init(); + + for(Column c : columns) { + addColumn(c); + } + } + + public Schema(Iterable columns) { init(); + for(Column c : columns) { addColumn(c); } @@ -93,21 +117,15 @@ private void init() { * @param qualifier The qualifier */ public void setQualifier(String qualifier) { - Schema copy = null; - try { - copy = (Schema) clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } + List columns = getColumns(); fields.clear(); fieldsByQualifiedName.clear(); fieldsByName.clear(); Column newColumn; - for (int i = 0; i < copy.size(); i++) { - Column column = copy.getColumn(i); - newColumn = new Column(qualifier + "." + column.getSimpleName(), column.getDataType()); + for (Column c : columns) { + newColumn = new Column(qualifier + "." + c.getSimpleName(), c.typeDesc); addColumn(newColumn); } } @@ -275,6 +293,21 @@ public boolean containsAll(Collection columns) { return fields.containsAll(columns); } + public synchronized Schema addColumn(String name, TypeDesc typeDesc) { + String normalized = name; + if(fieldsByQualifiedName.containsKey(normalized)) { + LOG.error("Already exists column " + normalized); + throw new AlreadyExistsFieldException(normalized); + } + + Column newCol = new Column(normalized, typeDesc); + fields.add(newCol); + fieldsByQualifiedName.put(newCol.getQualifiedName(), fields.size() - 1); + fieldsByName.put(newCol.getSimpleName(), TUtil.newList(fields.size() - 1)); + + return this; + } + public synchronized Schema addColumn(String name, Type type) { if (type == Type.CHAR) { return addColumn(name, CatalogUtil.newDataTypeWithLen(type, 1)); @@ -287,22 +320,13 @@ public synchronized Schema addColumn(String name, Type type, int length) { } public synchronized Schema addColumn(String name, DataType dataType) { - String normalized = name; - if(fieldsByQualifiedName.containsKey(normalized)) { - LOG.error("Already exists column " + normalized); - throw new AlreadyExistsFieldException(normalized); - } - - Column newCol = new Column(normalized, dataType); - fields.add(newCol); - fieldsByQualifiedName.put(newCol.getQualifiedName(), fields.size() - 1); - fieldsByName.put(newCol.getSimpleName(), TUtil.newList(fields.size() - 1)); - + addColumn(name, new TypeDesc(dataType)); + return this; } public synchronized void addColumn(Column column) { - addColumn(column.getQualifiedName(), column.getDataType()); + addColumn(column.getQualifiedName(), column.typeDesc); } public synchronized void addColumns(Schema schema) { @@ -327,10 +351,9 @@ public boolean equals(Object o) { @Override public Object clone() throws CloneNotSupportedException { - Schema schema = null; - - schema = (Schema) super.clone(); + Schema schema = (Schema) super.clone(); schema.init(); + for(Column column: this.fields) { schema.addColumn(column); } @@ -340,15 +363,36 @@ public Object clone() throws CloneNotSupportedException { @Override public SchemaProto getProto() { SchemaProto.Builder builder = SchemaProto.newBuilder(); - builder.clearFields(); - if (this.fields != null) { - for(Column col : fields) { - builder.addFields(col.getProto()); - } - } + SchemaProtoBuilder recursiveBuilder = new SchemaProtoBuilder(builder); + SchemaUtil.visitSchema(this, recursiveBuilder); return builder.build(); } + private static class SchemaProtoBuilder implements ColumnVisitor { + private SchemaProto.Builder builder; + public SchemaProtoBuilder(SchemaProto.Builder builder) { + this.builder = builder; + } + + @Override + public Column visit(int depth, Column column) { + + if (column.getDataType().getType() == Type.RECORD) { + DataType.Builder updatedType = DataType.newBuilder(column.getDataType()); + updatedType.setNumChildren(column.typeDesc.nestedRecordSchema.size()); + + ColumnProto.Builder updatedColumn = ColumnProto.newBuilder(column.getProto()); + updatedColumn.setDataType(updatedType); + + builder.addFields(updatedColumn.build()); + } else { + builder.addFields(column.getProto()); + } + + return column; + } + } + public String toString() { StringBuilder sb = new StringBuilder(); sb.append("{(").append(size()).append(") "); @@ -356,7 +400,7 @@ public String toString() { for(Column col : fields) { sb.append(col); if (i < fields.size() - 1) { - sb.append(","); + sb.append(", "); } i++; } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java index 23ebe1be88..b1600d6116 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java @@ -18,6 +18,11 @@ package org.apache.tajo.catalog; +import org.apache.tajo.catalog.proto.CatalogProtos; +import org.apache.tajo.catalog.proto.CatalogProtos.ColumnProto; + +import java.util.List; + import static org.apache.tajo.common.TajoDataTypes.DataType; import static org.apache.tajo.common.TajoDataTypes.Type; @@ -108,4 +113,29 @@ public static T clone(Schema schema) { } return names; } + + public static interface ColumnVisitor { + public R visit(int depth, Column column); + } + + public static interface ColumnProtoVisitor { + public R visit(int depth, ColumnProto column); + } + + public static void visitSchema(Schema schema, ColumnVisitor function) { + for(Column col : schema.getColumns()) { + visitInDepthFirstOrder(0, function, col); + } + } + + private static void visitInDepthFirstOrder(int depth, ColumnVisitor function, Column column) { + if (column.getDataType().getType() == Type.RECORD) { + for (Column nestedColumn : column.typeDesc.nestedRecordSchema.getColumns()) { + visitInDepthFirstOrder(depth + 1, function, nestedColumn); + } + function.visit(depth, column); + } else { + function.visit(depth, column); + } + } } diff --git a/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java b/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java index a61b4226a3..4d6b78d00f 100644 --- a/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java +++ b/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java @@ -167,6 +167,64 @@ public final void testSetQualifier() { assertEquals(schema3.getColumn(0), schema3.getColumn("tb2.col1")); assertEquals(schema3.getColumn(1), schema3.getColumn("col2")); assertEquals(schema3.getColumn(1), schema3.getColumn("tb2.col2")); + } + + @Test + public void testNestedRecord1() { + Schema s1 = new Schema(); + s1.addColumn("s1", Type.INT8); + + Schema nestedRecordSchema = new Schema(); + nestedRecordSchema.addColumn("s2", Type.FLOAT4); + nestedRecordSchema.addColumn("s3", Type.TEXT); + + Column nestedField = new Column("nestedField", new TypeDesc(nestedRecordSchema)); + s1.addColumn(nestedField); + + s1.addColumn("s4", Type.FLOAT8); + + verifySchema(s1); + } + + @Test + public void testNestedRecord2() { + // for two level nested schema and the same column names + + Schema schema = new Schema(); + schema.addColumn("s1", Type.INT8); + + Schema nestedRecordSchema1 = new Schema(); + nestedRecordSchema1.addColumn("s2", Type.FLOAT4); + nestedRecordSchema1.addColumn("s3", Type.TEXT); + + Schema nestedRecordSchema2 = new Schema(); + nestedRecordSchema2.addColumn("s2", Type.FLOAT4); + nestedRecordSchema2.addColumn("s3", Type.TEXT); + + Column nestedField1 = new Column("nestedField1", new TypeDesc(nestedRecordSchema1)); + schema.addColumn(nestedField1); + + schema.addColumn("s4", Type.FLOAT8); + + Column nestedField2 = new Column("nestedField2", new TypeDesc(nestedRecordSchema2)); + schema.addColumn(nestedField2); + + verifySchema(schema); + } + + public static void verifySchema(Schema s1) { + assertEquals(s1, s1); + + SchemaProto proto = s1.getProto(); + assertEquals("Proto (de)serialized schema is different from the original: ", s1, new Schema(proto)); + + Schema cloned = null; + try { + cloned = (Schema) s1.clone(); + } catch (CloneNotSupportedException e) { + fail("Clone is failed"); + } + assertEquals("Cloned schema is different from the original one:", s1, cloned); } } diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java index 85b8f2086a..f024175c86 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java @@ -27,6 +27,7 @@ class ColumnsTableDescriptor extends AbstractTableDescriptor { new ColumnDescriptor("tid", Type.INT4, 0), new ColumnDescriptor("column_name", Type.TEXT, 0), new ColumnDescriptor("ordinal_position", Type.INT4, 0), + new ColumnDescriptor("nested_field_num", Type.INT4, 0), new ColumnDescriptor("data_type", Type.TEXT, 0), new ColumnDescriptor("type_length", Type.INT4, 0) }; diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java index 04694b0aa0..dad482ffc5 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java @@ -33,6 +33,7 @@ import org.apache.tajo.catalog.exception.*; import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.catalog.proto.CatalogProtos.*; +import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.exception.InternalException; import org.apache.tajo.exception.UnimplementedException; @@ -800,7 +801,9 @@ public void createTable(final CatalogProtos.TableDescProto table) throws Catalog pstmt.close(); String colSql = - "INSERT INTO " + TB_COLUMNS + " (TID, COLUMN_NAME, ORDINAL_POSITION, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?) "; + "INSERT INTO " + TB_COLUMNS + + // 1 2 3 4 5 6 + " (TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?, ?) "; if (LOG.isDebugEnabled()) { LOG.debug(colSql); @@ -809,11 +812,14 @@ public void createTable(final CatalogProtos.TableDescProto table) throws Catalog pstmt = conn.prepareStatement(colSql); for (int i = 0; i < table.getSchema().getFieldsCount(); i++) { ColumnProto col = table.getSchema().getFields(i); + TajoDataTypes.DataType dataType = col.getDataType(); + pstmt.setInt(1, tableId); pstmt.setString(2, CatalogUtil.extractSimpleName(col.getName())); pstmt.setInt(3, i); - pstmt.setString(4, col.getDataType().getType().name()); - pstmt.setInt(5, (col.getDataType().hasLength() ? col.getDataType().getLength() : 0)); + pstmt.setInt(4, dataType.hasNumChildren() ? dataType.getNumChildren() : 0); + pstmt.setString(5, dataType.getType().name()); + pstmt.setInt(6, (col.getDataType().hasLength() ? col.getDataType().getLength() : 0)); pstmt.addBatch(); pstmt.clearParameters(); } @@ -1028,13 +1034,13 @@ private void renameColumn(final int tableId, final CatalogProtos.AlterColumnProt throws CatalogException { final String selectColumnSql = - "SELECT COLUMN_NAME, DATA_TYPE, TYPE_LENGTH, ORDINAL_POSITION from " + TB_COLUMNS + + "SELECT COLUMN_NAME, DATA_TYPE, TYPE_LENGTH, ORDINAL_POSITION, NESTED_FIELD_NUM from " + TB_COLUMNS + " WHERE " + COL_TABLES_PK + " = ?" + " AND COLUMN_NAME = ?" ; final String deleteColumnNameSql = "DELETE FROM " + TB_COLUMNS + " WHERE TID = ? AND COLUMN_NAME = ?"; final String insertNewColumnSql = "INSERT INTO " + TB_COLUMNS + - " (TID, COLUMN_NAME, ORDINAL_POSITION, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?) "; + " (TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?, ?) "; if (LOG.isDebugEnabled()) { LOG.debug(selectColumnSql); @@ -1058,13 +1064,15 @@ private void renameColumn(final int tableId, final CatalogProtos.AlterColumnProt resultSet = pstmt.executeQuery(); CatalogProtos.ColumnProto columnProto = null; - int ordinalPosition = -1; + int ordinalPosition = 0; + int nestedFieldNum = 0; if (resultSet.next()) { columnProto = resultToColumnProto(resultSet); //NOTE ==> Setting new column Name columnProto = columnProto.toBuilder().setName(alterColumnProto.getNewColumnName()).build(); ordinalPosition = resultSet.getInt("ORDINAL_POSITION"); + nestedFieldNum = resultSet.getInt("NESTED_FIELD_NUM"); } else { throw new NoSuchColumnException(alterColumnProto.getOldColumnName()); } @@ -1085,8 +1093,9 @@ private void renameColumn(final int tableId, final CatalogProtos.AlterColumnProt pstmt.setInt(1, tableId); pstmt.setString(2, CatalogUtil.extractSimpleName(columnProto.getName())); pstmt.setInt(3, ordinalPosition); - pstmt.setString(4, columnProto.getDataType().getType().name()); - pstmt.setInt(5, (columnProto.getDataType().hasLength() ? columnProto.getDataType().getLength() : 0)); + pstmt.setInt(4, nestedFieldNum); + pstmt.setString(5, columnProto.getDataType().getType().name()); + pstmt.setInt(6, (columnProto.getDataType().hasLength() ? columnProto.getDataType().getLength() : 0)); pstmt.executeUpdate(); conn.commit(); @@ -1101,8 +1110,12 @@ private void renameColumn(final int tableId, final CatalogProtos.AlterColumnProt private void addNewColumn(int tableId, CatalogProtos.ColumnProto columnProto) throws CatalogException { - final String insertNewColumnSql = "INSERT INTO " + TB_COLUMNS + " (TID, COLUMN_NAME, ORDINAL_POSITION, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?) "; - final String columnCountSql = "SELECT COLUMN_NAME, MAX(ORDINAL_POSITION) AS POSITION FROM " + TB_COLUMNS + " WHERE TID = ? GROUP BY COLUMN_NAME"; + final String insertNewColumnSql = + "INSERT INTO " + TB_COLUMNS + + " (TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?, ?) "; + final String columnCountSql = + "SELECT COLUMN_NAME, MAX(ORDINAL_POSITION) AS POSITION FROM " + TB_COLUMNS + + " WHERE TID = ? GROUP BY COLUMN_NAME"; if (LOG.isDebugEnabled()) { LOG.debug(insertNewColumnSql); @@ -1125,12 +1138,15 @@ private void addNewColumn(int tableId, CatalogProtos.ColumnProto columnProto) th pstmt.close(); resultSet = null; + TajoDataTypes.DataType dataType = columnProto.getDataType(); + pstmt = conn.prepareStatement(insertNewColumnSql); pstmt.setInt(1, tableId); pstmt.setString(2, CatalogUtil.extractSimpleName(columnProto.getName())); pstmt.setInt(3, position + 1); - pstmt.setString(4, columnProto.getDataType().getType().name()); - pstmt.setInt(5, (columnProto.getDataType().hasLength() ? columnProto.getDataType().getLength() : 0)); + pstmt.setInt(4, dataType.hasNumChildren() ? dataType.getNumChildren() : 0); + pstmt.setString(5, dataType.getType().name()); + pstmt.setInt(6, (columnProto.getDataType().hasLength() ? columnProto.getDataType().getLength() : 0)); pstmt.executeUpdate(); } catch (SQLException sqlException) { @@ -1385,7 +1401,7 @@ public CatalogProtos.TableDescProto getTable(String databaseName, String tableNa // Geting Column Descriptions ////////////////////////////////////////// CatalogProtos.SchemaProto.Builder schemaBuilder = CatalogProtos.SchemaProto.newBuilder(); - sql = "SELECT COLUMN_NAME, DATA_TYPE, TYPE_LENGTH from " + TB_COLUMNS + + sql = "SELECT COLUMN_NAME, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH from " + TB_COLUMNS + " WHERE " + COL_TABLES_PK + " = ? ORDER BY ORDINAL_POSITION ASC"; if (LOG.isDebugEnabled()) { @@ -1643,7 +1659,8 @@ public List getAllColumns() throws CatalogException { List columns = new ArrayList(); try { - String sql = "SELECT TID, COLUMN_NAME, ORDINAL_POSITION, DATA_TYPE, TYPE_LENGTH FROM " + TB_COLUMNS + + String sql = + "SELECT TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH FROM " + TB_COLUMNS + " ORDER BY TID ASC, ORDINAL_POSITION ASC"; conn = getConnection(); @@ -1654,11 +1671,15 @@ public List getAllColumns() throws CatalogException { builder.setTid(resultSet.getInt("TID")); builder.setName(resultSet.getString("COLUMN_NAME")); - + + int nestedFieldNum = resultSet.getInt("NESTED_FIELD_NUM"); + Type type = getDataType(resultSet.getString("DATA_TYPE").trim()); int typeLength = resultSet.getInt("TYPE_LENGTH"); - - if (typeLength > 0) { + + if (nestedFieldNum > 0) { + builder.setDataType(CatalogUtil.newRecordType(nestedFieldNum)); + } else if (typeLength > 0) { builder.setDataType(CatalogUtil.newDataTypeWithLen(type, typeLength)); } else { builder.setDataType(CatalogUtil.newSimpleDataType(type)); @@ -2309,9 +2330,14 @@ private ColumnProto resultToColumnProto(final ResultSet res) throws SQLException ColumnProto.Builder builder = ColumnProto.newBuilder(); builder.setName(res.getString("column_name").trim()); + int nestedFieldNum = res.getInt("NESTED_FIELD_NUM"); + Type type = getDataType(res.getString("data_type").trim()); int typeLength = res.getInt("type_length"); - if (typeLength > 0) { + + if (nestedFieldNum > 0) { + builder.setDataType(CatalogUtil.newRecordType(nestedFieldNum)); + } else if (typeLength > 0) { builder.setDataType(CatalogUtil.newDataTypeWithLen(type, typeLength)); } else { builder.setDataType(CatalogUtil.newSimpleDataType(type)); diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml index db2473be5a..9a271e66f6 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml @@ -72,6 +72,7 @@ TID INT NOT NULL REFERENCES TABLES (TID) ON DELETE CASCADE, COLUMN_NAME VARCHAR(128) NOT NULL, ORDINAL_POSITION INTEGER NOT NULL, + NESTED_FIELD_NUM INTEGER NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, CONSTRAINT COLUMNS_PK PRIMARY KEY (TID, COLUMN_NAME) diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql index aec1553843..16eed0212d 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql @@ -2,6 +2,7 @@ CREATE TABLE COLUMNS ( TID INT NOT NULL, COLUMN_NAME VARCHAR(255) BINARY NOT NULL, ORDINAL_POSITION INT NOT NULL, + NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, PRIMARY KEY (TID, COLUMN_NAME), diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql index aec1553843..16eed0212d 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql @@ -2,6 +2,7 @@ CREATE TABLE COLUMNS ( TID INT NOT NULL, COLUMN_NAME VARCHAR(255) BINARY NOT NULL, ORDINAL_POSITION INT NOT NULL, + NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, PRIMARY KEY (TID, COLUMN_NAME), diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql index 71943c0588..9a5e94018a 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql @@ -2,6 +2,7 @@ CREATE TABLE COLUMNS ( TID INT NOT NULL, COLUMN_NAME VARCHAR2(255) NOT NULL, ORDINAL_POSITION INT NOT NULL, + NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, COLUMN_NAME), diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql index 425be9606b..c490970401 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql @@ -2,6 +2,7 @@ CREATE TABLE COLUMNS ( TID INT NOT NULL, COLUMN_NAME VARCHAR(255) NOT NULL, ORDINAL_POSITION INT NOT NULL, + NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, COLUMN_NAME), diff --git a/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java b/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java index 305742f9bb..79e802fe44 100644 --- a/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java +++ b/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java @@ -431,6 +431,40 @@ public void testGetTable() throws Exception { assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); } + @Test + public void testCreateAndGetNestedTable1() throws Exception { + // schema creation + Schema schema = new Schema(); + schema.addColumn("s1", Type.INT8); + Schema nestedRecordSchema = new Schema(); + nestedRecordSchema.addColumn("s2", Type.FLOAT4); + nestedRecordSchema.addColumn("s3", Type.TEXT); + Column nestedField = new Column("nestedField", new TypeDesc(nestedRecordSchema)); + schema.addColumn(nestedField); + schema.addColumn("s4", Type.FLOAT8); + + Path path = new Path(CommonTestingUtil.getTestDir(), "table1"); + TableDesc meta = new TableDesc( + CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable"), + schema, + StoreType.CSV, + new KeyValueSet(), + path.toUri()); + + // schema creation + assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); + catalog.createTable(meta); + assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); + + schema.setQualifier(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); // change it for the equals test. + TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, "getTable"); +// assertEquals(schema, restored.getSchema()); + + // drop test + catalog.dropTable(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); + assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); + } + static IndexDesc desc1; static IndexDesc desc2; static IndexDesc desc3; diff --git a/tajo-common/src/main/proto/DataTypes.proto b/tajo-common/src/main/proto/DataTypes.proto index 73948f18b8..7d5adfdd67 100644 --- a/tajo-common/src/main/proto/DataTypes.proto +++ b/tajo-common/src/main/proto/DataTypes.proto @@ -101,13 +101,15 @@ enum Type { message DataType { required Type type = 1; optional int32 length = 2; + + /* Auxiliary information */ optional string code = 3; - /** - * Nested fields. Since Protobuf does not support nested fields, - * the nesting is flattened to a single list by a depth-first traversal. - * The children count is used to construct the nested relationship. - * This field is not set when the element is a primitive type - */ - optional int32 num_children = 4; + /** + * Nested fields. Since Protobuf does not support nested fields, + * the nesting is flattened to a single list by a depth-first traversal. + * The children count is used to construct the nested relationship. + * This field is not set when the element is a primitive type + */ + optional int32 num_children = 4; } diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java index 21b3910eb2..a9528da8e1 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java @@ -600,4 +600,16 @@ public final void testCreateTableLike1() throws Exception { */ } } + + @Test + public final void testCreateTableWithNestedRecord() throws Exception { + executeString("CREATE DATABASE D9;").close(); + + assertTableNotExists("d9.nested_table"); + executeString("CREATE TABLE D9.nested_table (f1 int, nested_field record (f2 int4, f3 int8), f3 text);").close(); + assertTableExists("d9.nested_table"); + + executeString("DROP TABLE D9.nested_table"); + executeString("DROP DATABASE D9").close(); + } } diff --git a/tajo-core/src/test/resources/results/TestSelectQuery/testExplainSelect.result b/tajo-core/src/test/resources/results/TestSelectQuery/testExplainSelect.result index a6aa8f45fd..2dc746dc94 100644 --- a/tajo-core/src/test/resources/results/TestSelectQuery/testExplainSelect.result +++ b/tajo-core/src/test/resources/results/TestSelectQuery/testExplainSelect.result @@ -2,5 +2,5 @@ explain ------------------------------- SCAN(0) on default.lineitem => target list: default.lineitem.l_orderkey (INT4), default.lineitem.l_partkey (INT4) - => out schema: {(2) default.lineitem.l_orderkey (INT4),default.lineitem.l_partkey (INT4)} - => in schema: {(16) default.lineitem.l_orderkey (INT4),default.lineitem.l_partkey (INT4),default.lineitem.l_suppkey (INT4),default.lineitem.l_linenumber (INT4),default.lineitem.l_quantity (FLOAT8),default.lineitem.l_extendedprice (FLOAT8),default.lineitem.l_discount (FLOAT8),default.lineitem.l_tax (FLOAT8),default.lineitem.l_returnflag (TEXT),default.lineitem.l_linestatus (TEXT),default.lineitem.l_shipdate (TEXT),default.lineitem.l_commitdate (TEXT),default.lineitem.l_receiptdate (TEXT),default.lineitem.l_shipinstruct (TEXT),default.lineitem.l_shipmode (TEXT),default.lineitem.l_comment (TEXT)} \ No newline at end of file + => out schema: {(2) default.lineitem.l_orderkey (INT4), default.lineitem.l_partkey (INT4)} + => in schema: {(16) default.lineitem.l_orderkey (INT4), default.lineitem.l_partkey (INT4), default.lineitem.l_suppkey (INT4), default.lineitem.l_linenumber (INT4), default.lineitem.l_quantity (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_tax (FLOAT8), default.lineitem.l_returnflag (TEXT), default.lineitem.l_linestatus (TEXT), default.lineitem.l_shipdate (TEXT), default.lineitem.l_commitdate (TEXT), default.lineitem.l_receiptdate (TEXT), default.lineitem.l_shipinstruct (TEXT), default.lineitem.l_shipmode (TEXT), default.lineitem.l_comment (TEXT)} \ No newline at end of file From 1c8d2585c1a13f16fd20acc5076b9fb1f734c91c Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Fri, 27 Feb 2015 04:10:05 -0800 Subject: [PATCH 05/12] TAJO-1329: Improve Schema class to support nested struct support. --- .../org/apache/tajo/catalog/CatalogUtil.java | 12 +- .../java/org/apache/tajo/catalog/Schema.java | 8 +- .../org/apache/tajo/catalog/SchemaUtil.java | 12 +- .../org/apache/tajo/catalog/TestSchema.java | 118 ++++++++++++------ .../org/apache/tajo/catalog/TestCatalog.java | 45 +++++-- 5 files changed, 127 insertions(+), 68 deletions(-) diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java index b919f56c68..722be23441 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java @@ -327,15 +327,9 @@ public static PartitionMethodDesc newPartitionMethodDesc(CatalogProtos.Partition * @return */ public static SchemaProto getQualfiedSchema(String tableName, SchemaProto schema) { - SchemaProto.Builder revisedSchema = SchemaProto.newBuilder(schema); - revisedSchema.clearFields(); - for (ColumnProto col : schema.getFieldsList()) { - ColumnProto.Builder builder = ColumnProto.newBuilder(col); - builder.setName(tableName + CatalogConstants.IDENTIFIER_DELIMITER + extractSimpleName(col.getName())); - revisedSchema.addFields(builder.build()); - } - - return revisedSchema.build(); + Schema restored = new Schema(schema); + restored.setQualifier(tableName); + return restored.getProto(); } public static DataType newDataType(Type type, String code) { diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java index 44a3e45d3f..bb4faf5e23 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java @@ -67,7 +67,7 @@ private static void deserializeColumn(List fields, List pro // Get the number of child fields int childNum = columnProto.getDataType().getNumChildren(); // where is start index of nested fields? - int childStartIndex = fields.size() - columnProto.getDataType().getNumChildren(); + int childStartIndex = fields.size() - childNum; // Extract nested fields List nestedColumns = TUtil.newList(fields.subList(childStartIndex, childStartIndex + childNum)); // Remove nested fields from the the current level @@ -368,14 +368,14 @@ public SchemaProto getProto() { return builder.build(); } - private static class SchemaProtoBuilder implements ColumnVisitor { + private static class SchemaProtoBuilder implements ColumnVisitor { private SchemaProto.Builder builder; public SchemaProtoBuilder(SchemaProto.Builder builder) { this.builder = builder; } @Override - public Column visit(int depth, Column column) { + public void visit(int depth, Column column) { if (column.getDataType().getType() == Type.RECORD) { DataType.Builder updatedType = DataType.newBuilder(column.getDataType()); @@ -388,8 +388,6 @@ public Column visit(int depth, Column column) { } else { builder.addFields(column.getProto()); } - - return column; } } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java index b1600d6116..0a8a71b7e4 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java @@ -114,21 +114,17 @@ public static T clone(Schema schema) { return names; } - public static interface ColumnVisitor { - public R visit(int depth, Column column); + public static interface ColumnVisitor { + public void visit(int depth, Column column); } - public static interface ColumnProtoVisitor { - public R visit(int depth, ColumnProto column); - } - - public static void visitSchema(Schema schema, ColumnVisitor function) { + public static void visitSchema(Schema schema, ColumnVisitor function) { for(Column col : schema.getColumns()) { visitInDepthFirstOrder(0, function, col); } } - private static void visitInDepthFirstOrder(int depth, ColumnVisitor function, Column column) { + private static void visitInDepthFirstOrder(int depth, ColumnVisitor function, Column column) { if (column.getDataType().getType() == Type.RECORD) { for (Column nestedColumn : column.typeDesc.nestedRecordSchema.getColumns()) { visitInDepthFirstOrder(depth + 1, function, nestedColumn); diff --git a/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java b/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java index 4d6b78d00f..01c31241e2 100644 --- a/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java +++ b/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java @@ -34,6 +34,87 @@ public class TestSchema { Column col2; Column col3; + public static final Schema nestedSchema1; + public static final Schema nestedSchema2; + public static final Schema nestedSchema3; + + static { + // simple nested schema + nestedSchema1 = new Schema(); + nestedSchema1.addColumn("s1", Type.INT8); + + Schema nestedRecordSchema = new Schema(); + nestedRecordSchema.addColumn("s2", Type.FLOAT4); + nestedRecordSchema.addColumn("s3", Type.TEXT); + + Column nestedField = new Column("s4", new TypeDesc(nestedRecordSchema)); + nestedSchema1.addColumn(nestedField); + + nestedSchema1.addColumn("s5", Type.FLOAT8); + + // two level nested schema + // + // s1 + // |- s2 + // |- s4 + // |- s3 + // |- s4 + // |- s5 + // |- s8 + // |- s6 + // |- s7 + nestedSchema2 = new Schema(); + nestedSchema2.addColumn("s1", Type.INT8); + + Schema nestedRecordSchema1 = new Schema(); + nestedRecordSchema1.addColumn("s2", Type.FLOAT4); + nestedRecordSchema1.addColumn("s3", Type.TEXT); + + Column nestedField1 = new Column("s4", new TypeDesc(nestedRecordSchema1)); + nestedSchema2.addColumn(nestedField1); + + nestedSchema2.addColumn("s5", Type.FLOAT8); + + Schema nestedRecordSchema2 = new Schema(); + nestedRecordSchema2.addColumn("s6", Type.FLOAT4); + nestedRecordSchema2.addColumn("s7", Type.TEXT); + + Column nestedField2 = new Column("s8", new TypeDesc(nestedRecordSchema2)); + nestedSchema2.addColumn(nestedField2); + + + // three level nested schema + // + // s1 + // |- s2 + // |- s3 + // |- s4 + // |- s7 + // |- s5 + // |- s6 + // |- s8 + // |- s9 + + nestedSchema3 = new Schema(); + nestedSchema3.addColumn("s1", Type.INT8); + + nestedSchema3.addColumn("s2", Type.INT8); + + Schema s5 = new Schema(); + s5.addColumn("s6", Type.INT8); + + Schema s7 = new Schema(); + s7.addColumn("s5", new TypeDesc(s5)); + + Schema s3 = new Schema(); + s3.addColumn("s4", Type.INT8); + s3.addColumn("s7", new TypeDesc(s7)); + s3.addColumn("s8", Type.INT8); + + nestedSchema3.addColumn("s3", new TypeDesc(s3)); + nestedSchema3.addColumn("s9", Type.INT8); + } + @Before public void setUp() throws Exception { schema = new Schema(); @@ -171,45 +252,12 @@ public final void testSetQualifier() { @Test public void testNestedRecord1() { - Schema s1 = new Schema(); - s1.addColumn("s1", Type.INT8); - - Schema nestedRecordSchema = new Schema(); - nestedRecordSchema.addColumn("s2", Type.FLOAT4); - nestedRecordSchema.addColumn("s3", Type.TEXT); - - Column nestedField = new Column("nestedField", new TypeDesc(nestedRecordSchema)); - s1.addColumn(nestedField); - - s1.addColumn("s4", Type.FLOAT8); - - verifySchema(s1); + verifySchema(nestedSchema1); } @Test public void testNestedRecord2() { - // for two level nested schema and the same column names - - Schema schema = new Schema(); - schema.addColumn("s1", Type.INT8); - - Schema nestedRecordSchema1 = new Schema(); - nestedRecordSchema1.addColumn("s2", Type.FLOAT4); - nestedRecordSchema1.addColumn("s3", Type.TEXT); - - Schema nestedRecordSchema2 = new Schema(); - nestedRecordSchema2.addColumn("s2", Type.FLOAT4); - nestedRecordSchema2.addColumn("s3", Type.TEXT); - - Column nestedField1 = new Column("nestedField1", new TypeDesc(nestedRecordSchema1)); - schema.addColumn(nestedField1); - - schema.addColumn("s4", Type.FLOAT8); - - Column nestedField2 = new Column("nestedField2", new TypeDesc(nestedRecordSchema2)); - schema.addColumn(nestedField2); - - verifySchema(schema); + verifySchema(nestedSchema2); } public static void verifySchema(Schema s1) { diff --git a/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java b/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java index 79e802fe44..048489bea1 100644 --- a/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java +++ b/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java @@ -434,19 +434,41 @@ public void testGetTable() throws Exception { @Test public void testCreateAndGetNestedTable1() throws Exception { // schema creation - Schema schema = new Schema(); - schema.addColumn("s1", Type.INT8); - Schema nestedRecordSchema = new Schema(); - nestedRecordSchema.addColumn("s2", Type.FLOAT4); - nestedRecordSchema.addColumn("s3", Type.TEXT); - Column nestedField = new Column("nestedField", new TypeDesc(nestedRecordSchema)); - schema.addColumn(nestedField); - schema.addColumn("s4", Type.FLOAT8); + // three level nested schema + // + // s1 + // |- s2 + // |- s3 + // |- s4 + // |- s7 + // |- s5 + // |- s6 + // |- s8 + // |- s9 + + Schema nestedSchema = new Schema(); + nestedSchema.addColumn("s1", Type.INT8); + + nestedSchema.addColumn("s2", Type.INT8); + + Schema s5 = new Schema(); + s5.addColumn("s6", Type.INT8); + + Schema s7 = new Schema(); + s7.addColumn("s5", new TypeDesc(s5)); + + Schema s3 = new Schema(); + s3.addColumn("s4", Type.INT8); + s3.addColumn("s7", new TypeDesc(s7)); + s3.addColumn("s8", Type.INT8); + + nestedSchema.addColumn("s3", new TypeDesc(s3)); + nestedSchema.addColumn("s9", Type.INT8); Path path = new Path(CommonTestingUtil.getTestDir(), "table1"); TableDesc meta = new TableDesc( CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable"), - schema, + nestedSchema, StoreType.CSV, new KeyValueSet(), path.toUri()); @@ -456,9 +478,10 @@ public void testCreateAndGetNestedTable1() throws Exception { catalog.createTable(meta); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); - schema.setQualifier(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); // change it for the equals test. + // change it for the equals test. + nestedSchema.setQualifier(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, "getTable"); -// assertEquals(schema, restored.getSchema()); + assertEquals(nestedSchema, restored.getSchema()); // drop test catalog.dropTable(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); From 0ef60183c48f77bd03ac3eb9ef80d62dbe66bf0e Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Fri, 27 Feb 2015 11:52:32 -0800 Subject: [PATCH 06/12] Change child_fields_num to nested_field_num. * Add more comments. * Clean up some codes. --- .../org/apache/tajo/algebra/DataTypeExpr.java | 5 ++- .../org/apache/tajo/catalog/CatalogUtil.java | 11 +++++-- .../java/org/apache/tajo/catalog/Schema.java | 33 ++++++++++++++----- .../org/apache/tajo/catalog/SchemaUtil.java | 20 ++++++++--- .../org/apache/tajo/catalog/TypeDesc.java | 3 ++ .../dictionary/ColumnsTableDescriptor.java | 8 ++--- .../tajo/catalog/store/AbstractDBStore.java | 8 +++-- tajo-common/src/main/proto/DataTypes.proto | 2 +- 8 files changed, 62 insertions(+), 28 deletions(-) diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java index 96c9adbd7a..b280397e27 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/DataTypeExpr.java @@ -32,7 +32,7 @@ public class DataTypeExpr extends Expr { @Expose @SerializedName("Scale") Integer scale; @Expose @SerializedName("Record") - ColumnDefinition [] nestedRecordTypes; + ColumnDefinition [] nestedRecordTypes; // not null if the type is RECORD public DataTypeExpr(String typeName) { super(OpType.DataType); @@ -41,8 +41,7 @@ public DataTypeExpr(String typeName) { public DataTypeExpr(ColumnDefinition [] nestedRecordTypes) { super(OpType.DataType); - // Please refer to DataTypes.proto. 'STRUCT' must be equivalent to Enum type in DataTypes.proto. - // STRUCT = 51; + // RECORD = 51 in DataTypes.proto this.typeName = Type.RECORD.name(); this.nestedRecordTypes = nestedRecordTypes; } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java index 722be23441..afcff2d36f 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java @@ -318,8 +318,7 @@ public static PartitionMethodDesc newPartitionMethodDesc(CatalogProtos.Partition } /** - * This method transforms the unqualified names of a given schema into - * the qualified names. + * This method transforms the unqualified names of a schema to the qualified names. * * @param tableName a table name to be prefixed * @param schema a schema to be transformed @@ -348,10 +347,16 @@ public static DataType newSimpleDataType(Type type) { return DataType.newBuilder().setType(type).build(); } + /** + * Create a record type + * + * @param nestedFieldNum The number of nested fields + * @return RECORD DataType + */ public static DataType newRecordType(int nestedFieldNum) { DataType.Builder builder = DataType.newBuilder(); builder.setType(Type.RECORD); - builder.setNumChildren(nestedFieldNum); + builder.setNumNestedFields(nestedFieldNum); return builder.build(); } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java index bb4faf5e23..5e903b0a03 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java @@ -46,7 +46,14 @@ public class Schema implements ProtoObject, Cloneable, GsonObject { public Schema() { init(); } - + + /** + * This Schema constructor restores a serialized schema into in-memory Schema structure. + * A serialized schema is an ordered list in depth-first order over a nested schema. + * This constructor transforms the list into a tree-like structure. + * + * @param proto + */ public Schema(SchemaProto proto) { init(); @@ -60,23 +67,31 @@ public Schema(SchemaProto proto) { } } - private static void deserializeColumn(List fields, List protos, int serializedColumnIndex) { + /** + * This method transforms a list of ColumnProtos into a schema tree. + * It assumes that protos contains a list of ColumnProtos in the depth-first order. + * + * @param tobeAdded + * @param protos + * @param serializedColumnIndex + */ + private static void deserializeColumn(List tobeAdded, List protos, int serializedColumnIndex) { ColumnProto columnProto = protos.get(serializedColumnIndex); if (columnProto.getDataType().getType() == Type.RECORD) { // Get the number of child fields - int childNum = columnProto.getDataType().getNumChildren(); + int childNum = columnProto.getDataType().getNumNestedFields(); // where is start index of nested fields? - int childStartIndex = fields.size() - childNum; + int childStartIndex = tobeAdded.size() - childNum; // Extract nested fields - List nestedColumns = TUtil.newList(fields.subList(childStartIndex, childStartIndex + childNum)); + List nestedColumns = TUtil.newList(tobeAdded.subList(childStartIndex, childStartIndex + childNum)); // Remove nested fields from the the current level - fields.removeAll(nestedColumns); + tobeAdded.removeAll(nestedColumns); // Add the nested fields to the list as a single record column - fields.add(new Column(columnProto.getName(), new TypeDesc(new Schema(nestedColumns)))); + tobeAdded.add(new Column(columnProto.getName(), new TypeDesc(new Schema(nestedColumns)))); } else { - fields.add(new Column(protos.get(serializedColumnIndex))); + tobeAdded.add(new Column(protos.get(serializedColumnIndex))); } } @@ -379,7 +394,7 @@ public void visit(int depth, Column column) { if (column.getDataType().getType() == Type.RECORD) { DataType.Builder updatedType = DataType.newBuilder(column.getDataType()); - updatedType.setNumChildren(column.typeDesc.nestedRecordSchema.size()); + updatedType.setNumNestedFields(column.typeDesc.nestedRecordSchema.size()); ColumnProto.Builder updatedColumn = ColumnProto.newBuilder(column.getProto()); updatedColumn.setDataType(updatedType); diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java index 0a8a71b7e4..1b9e0ae478 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java @@ -18,11 +18,6 @@ package org.apache.tajo.catalog; -import org.apache.tajo.catalog.proto.CatalogProtos; -import org.apache.tajo.catalog.proto.CatalogProtos.ColumnProto; - -import java.util.List; - import static org.apache.tajo.common.TajoDataTypes.DataType; import static org.apache.tajo.common.TajoDataTypes.Type; @@ -114,16 +109,31 @@ public static T clone(Schema schema) { return names; } + /** + * Column visitor interface + */ public static interface ColumnVisitor { public void visit(int depth, Column column); } + /** + * It allows a column visitor to traverse all columns in a schema in a depth-first order. + * @param schema + * @param function + */ public static void visitSchema(Schema schema, ColumnVisitor function) { for(Column col : schema.getColumns()) { visitInDepthFirstOrder(0, function, col); } } + /** + * A recursive function to traverse all columns in a schema in a depth-first order. + * + * @param depth Nested depth. 0 is root column. + * @param function Visitor + * @param column Current visiting column + */ private static void visitInDepthFirstOrder(int depth, ColumnVisitor function, Column column) { if (column.getDataType().getType() == Type.RECORD) { for (Column nestedColumn : column.typeDesc.nestedRecordSchema.getColumns()) { diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java index d82874b571..7956c708b8 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java @@ -24,6 +24,9 @@ import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.util.TUtil; +/** + * Type Description for a column + */ public class TypeDesc { @Expose protected DataType dataType; @Expose protected Schema nestedRecordSchema; // NULL unless type is RECORD. diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java index f024175c86..9ab65ec835 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/dictionary/ColumnsTableDescriptor.java @@ -24,10 +24,10 @@ class ColumnsTableDescriptor extends AbstractTableDescriptor { private static final String TABLENAME = "columns"; private final ColumnDescriptor[] columns = new ColumnDescriptor[] { - new ColumnDescriptor("tid", Type.INT4, 0), - new ColumnDescriptor("column_name", Type.TEXT, 0), - new ColumnDescriptor("ordinal_position", Type.INT4, 0), - new ColumnDescriptor("nested_field_num", Type.INT4, 0), + new ColumnDescriptor("tid", Type.INT4, 0), // just key for DBMS + new ColumnDescriptor("column_name", Type.TEXT, 0), // column name + new ColumnDescriptor("ordinal_position", Type.INT4, 0), // the ordinal position in a schema + new ColumnDescriptor("nested_field_num", Type.INT4, 0), // the number of child nested fields new ColumnDescriptor("data_type", Type.TEXT, 0), new ColumnDescriptor("type_length", Type.INT4, 0) }; diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java index dad482ffc5..208d8f1c91 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java @@ -803,7 +803,8 @@ public void createTable(final CatalogProtos.TableDescProto table) throws Catalog String colSql = "INSERT INTO " + TB_COLUMNS + // 1 2 3 4 5 6 - " (TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?, ?) "; + " (TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH)" + + " VALUES(?, ?, ?, ?, ?, ?) "; if (LOG.isDebugEnabled()) { LOG.debug(colSql); @@ -817,7 +818,8 @@ public void createTable(final CatalogProtos.TableDescProto table) throws Catalog pstmt.setInt(1, tableId); pstmt.setString(2, CatalogUtil.extractSimpleName(col.getName())); pstmt.setInt(3, i); - pstmt.setInt(4, dataType.hasNumChildren() ? dataType.getNumChildren() : 0); + // the default number of nested fields is 0. + pstmt.setInt(4, dataType.hasNumNestedFields() ? dataType.getNumNestedFields() : 0); pstmt.setString(5, dataType.getType().name()); pstmt.setInt(6, (col.getDataType().hasLength() ? col.getDataType().getLength() : 0)); pstmt.addBatch(); @@ -1144,7 +1146,7 @@ private void addNewColumn(int tableId, CatalogProtos.ColumnProto columnProto) th pstmt.setInt(1, tableId); pstmt.setString(2, CatalogUtil.extractSimpleName(columnProto.getName())); pstmt.setInt(3, position + 1); - pstmt.setInt(4, dataType.hasNumChildren() ? dataType.getNumChildren() : 0); + pstmt.setInt(4, dataType.hasNumNestedFields() ? dataType.getNumNestedFields() : 0); pstmt.setString(5, dataType.getType().name()); pstmt.setInt(6, (columnProto.getDataType().hasLength() ? columnProto.getDataType().getLength() : 0)); pstmt.executeUpdate(); diff --git a/tajo-common/src/main/proto/DataTypes.proto b/tajo-common/src/main/proto/DataTypes.proto index 7d5adfdd67..fc5ac9a84d 100644 --- a/tajo-common/src/main/proto/DataTypes.proto +++ b/tajo-common/src/main/proto/DataTypes.proto @@ -111,5 +111,5 @@ message DataType { * The children count is used to construct the nested relationship. * This field is not set when the element is a primitive type */ - optional int32 num_children = 4; + optional int32 num_nested_fields = 4; } From a2d07415faa224849786954eeb47ebc7c3d90d36 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 5 Mar 2015 13:08:10 -0800 Subject: [PATCH 07/12] Allow catalog stores to have duplicated names across the different levels. --- .../tajo/catalog/store/AbstractDBStore.java | 4 +- .../main/resources/schemas/derby/derby.xml | 2 +- .../resources/schemas/mariadb/columns.sql | 2 +- .../main/resources/schemas/mysql/columns.sql | 2 +- .../main/resources/schemas/oracle/columns.sql | 2 +- .../resources/schemas/postgresql/columns.sql | 2 +- .../org/apache/tajo/catalog/TestCatalog.java | 78 +++++++++++++++---- 7 files changed, 68 insertions(+), 24 deletions(-) diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java index 208d8f1c91..76bf2075ea 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java @@ -1116,8 +1116,7 @@ private void addNewColumn(int tableId, CatalogProtos.ColumnProto columnProto) th "INSERT INTO " + TB_COLUMNS + " (TID, COLUMN_NAME, ORDINAL_POSITION, NESTED_FIELD_NUM, DATA_TYPE, TYPE_LENGTH) VALUES(?, ?, ?, ?, ?, ?) "; final String columnCountSql = - "SELECT COLUMN_NAME, MAX(ORDINAL_POSITION) AS POSITION FROM " + TB_COLUMNS + - " WHERE TID = ? GROUP BY COLUMN_NAME"; + "SELECT MAX(ORDINAL_POSITION) AS POSITION FROM " + TB_COLUMNS + " WHERE TID = ?"; if (LOG.isDebugEnabled()) { LOG.debug(insertNewColumnSql); @@ -1134,6 +1133,7 @@ private void addNewColumn(int tableId, CatalogProtos.ColumnProto columnProto) th pstmt.setInt(1 , tableId); resultSet = pstmt.executeQuery(); + // get the last the ordinal position. int position = resultSet.next() ? resultSet.getInt("POSITION") : 0; resultSet.close(); diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml index 9a271e66f6..1795cfb805 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml @@ -75,7 +75,7 @@ NESTED_FIELD_NUM INTEGER NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - CONSTRAINT COLUMNS_PK PRIMARY KEY (TID, COLUMN_NAME) + CONSTRAINT COLUMNS_PK PRIMARY KEY (TID, ORDINAL_POSITION) )]]> diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql index 16eed0212d..5b8ed26ff6 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mariadb/columns.sql @@ -5,6 +5,6 @@ CREATE TABLE COLUMNS ( NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - PRIMARY KEY (TID, COLUMN_NAME), + PRIMARY KEY (TID, ORDINAL_POSITION), FOREIGN KEY (TID) REFERENCES TABLES (TID) ON DELETE CASCADE ) diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql index 16eed0212d..5b8ed26ff6 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/mysql/columns.sql @@ -5,6 +5,6 @@ CREATE TABLE COLUMNS ( NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - PRIMARY KEY (TID, COLUMN_NAME), + PRIMARY KEY (TID, ORDINAL_POSITION), FOREIGN KEY (TID) REFERENCES TABLES (TID) ON DELETE CASCADE ) diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql index 9a5e94018a..eb3d79a34c 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/columns.sql @@ -5,6 +5,6 @@ CREATE TABLE COLUMNS ( NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, COLUMN_NAME), + CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, ORDINAL_POSITION), FOREIGN KEY (TID) REFERENCES TABLES (TID) ON DELETE CASCADE ) \ No newline at end of file diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql index c490970401..0adb8ceb81 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/columns.sql @@ -5,6 +5,6 @@ CREATE TABLE COLUMNS ( NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, COLUMN_NAME), + CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, ORDINAL_POSITION), FOREIGN KEY (TID) REFERENCES TABLES (TID) ON DELETE CASCADE ) \ No newline at end of file diff --git a/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java b/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java index 048489bea1..c3bfc99bb0 100644 --- a/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java +++ b/tajo-catalog/tajo-catalog-server/src/test/java/org/apache/tajo/catalog/TestCatalog.java @@ -431,6 +431,33 @@ public void testGetTable() throws Exception { assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); } + /** + * It asserts the equality between an original table desc and a restored table desc. + */ + private static void assertSchemaEquality(String tableName, Schema schema) throws IOException { + Path path = new Path(CommonTestingUtil.getTestDir(), tableName); + TableDesc tableDesc = new TableDesc( + CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName), + schema, + StoreType.CSV, + new KeyValueSet(), + path.toUri()); + + // schema creation + assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); + catalog.createTable(tableDesc); + assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); + + // change it for the equals test. + schema.setQualifier(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName)); + TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); + assertEquals(schema, restored.getSchema()); + + // drop test + catalog.dropTable(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, tableName)); + assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); + } + @Test public void testCreateAndGetNestedTable1() throws Exception { // schema creation @@ -465,27 +492,44 @@ public void testCreateAndGetNestedTable1() throws Exception { nestedSchema.addColumn("s3", new TypeDesc(s3)); nestedSchema.addColumn("s9", Type.INT8); - Path path = new Path(CommonTestingUtil.getTestDir(), "table1"); - TableDesc meta = new TableDesc( - CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable"), - nestedSchema, - StoreType.CSV, - new KeyValueSet(), - path.toUri()); + assertSchemaEquality("nested_schema1", nestedSchema); + } + @Test + public void testCreateAndGetNestedTable2() throws Exception { // schema creation - assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); - catalog.createTable(meta); - assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); + // three level nested schema + // + // s1 + // |- s2 + // |- s3 + // |- s1 + // |- s2 + // |- s3 + // |- s1 + // |- s3 + // |- s4 - // change it for the equals test. - nestedSchema.setQualifier(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); - TableDesc restored = catalog.getTableDesc(DEFAULT_DATABASE_NAME, "getTable"); - assertEquals(nestedSchema, restored.getSchema()); + Schema nestedSchema = new Schema(); + nestedSchema.addColumn("s1", Type.INT8); - // drop test - catalog.dropTable(CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); - assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); + nestedSchema.addColumn("s2", Type.INT8); + + Schema s5 = new Schema(); + s5.addColumn("s6", Type.INT8); + + Schema s7 = new Schema(); + s7.addColumn("s5", new TypeDesc(s5)); + + Schema s3 = new Schema(); + s3.addColumn("s4", Type.INT8); + s3.addColumn("s7", new TypeDesc(s7)); + s3.addColumn("s8", Type.INT8); + + nestedSchema.addColumn("s3", new TypeDesc(s3)); + nestedSchema.addColumn("s9", Type.INT8); + + assertSchemaEquality("nested_schema2", nestedSchema); } static IndexDesc desc1; From 8c000ce9ee53c5547f80f6ad412b5f2a560e1e37 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 5 Mar 2015 22:45:31 -0800 Subject: [PATCH 08/12] Fixed unique key of derby. --- .../src/main/resources/schemas/derby/derby.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml index 1795cfb805..5ac6394be2 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml @@ -80,7 +80,7 @@ - + Date: Wed, 11 Mar 2015 03:03:11 -0700 Subject: [PATCH 09/12] Add missed nested field support. * Improve tajo-dump to support nested schema * enable \d command to show nested schema --- .../java/org/apache/tajo/catalog/Column.java | 9 +++++ .../org/apache/tajo/catalog/DDLBuilder.java | 9 ++--- .../org/apache/tajo/catalog/SchemaUtil.java | 6 +++ .../org/apache/tajo/catalog/TypeDesc.java | 2 +- .../cli/tsql/commands/DescTableCommand.java | 5 +-- .../apache/tajo/cli/tools/TestTajoDump.java | 21 ++++++++++ .../org/apache/tajo/cli/tsql/TestTajoCli.java | 40 +++++++++++++------ .../testDescTableForNestedSchema.result | 29 ++++++++++++++ .../results/TestTajoDump/testDump2.result | 16 ++++++++ 9 files changed, 114 insertions(+), 23 deletions(-) create mode 100644 tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result create mode 100644 tajo-core/src/test/resources/results/TestTajoDump/testDump2.result diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java index 5fc87de10b..12edaa74cb 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Column.java @@ -110,6 +110,15 @@ public String getSimpleName() { return CatalogUtil.extractSimpleName(name); } + /** + * Return type description + * + * @return TypeDesc + */ + public TypeDesc getTypeDesc() { + return this.typeDesc; + } + /** * * @return DataType which includes domain type and scale. diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/DDLBuilder.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/DDLBuilder.java index 978092ac1b..1a59e88e8e 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/DDLBuilder.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/DDLBuilder.java @@ -69,7 +69,7 @@ public static String buildDDLForBaseTable(TableDesc desc) { return sb.toString(); } - private static void buildSchema(StringBuilder sb, Schema schema) { + public static void buildSchema(StringBuilder sb, Schema schema) { boolean first = true; sb.append(" ("); @@ -81,11 +81,8 @@ private static void buildSchema(StringBuilder sb, Schema schema) { } sb.append(CatalogUtil.denormalizeIdentifier(column.getSimpleName())).append(" "); - TajoDataTypes.DataType dataType = column.getDataType(); - sb.append(dataType.getType().name()); - if (column.getDataType().hasLength() && column.getDataType().getLength() > 0) { - sb.append(" (").append(column.getDataType().getLength()).append(")"); - } + TypeDesc typeDesc = column.getTypeDesc(); + sb.append(typeDesc); } sb.append(")"); } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java index 1b9e0ae478..f2bb71cd5b 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java @@ -144,4 +144,10 @@ private static void visitInDepthFirstOrder(int depth, ColumnVisitor function, Co function.visit(depth, column); } } + + public static String toDisplayString(Schema schema) { + StringBuilder sb = new StringBuilder(); + DDLBuilder.buildSchema(sb, schema); + return sb.toString(); + } } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java index 7956c708b8..3bd0f006a6 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java @@ -63,7 +63,7 @@ public String toString() { StringBuilder sb = new StringBuilder(); if (dataType.getType() == Type.RECORD) { - sb.append("RECORD (").append(nestedRecordSchema.toString()).append(")"); + sb.append("RECORD").append(SchemaUtil.toDisplayString(nestedRecordSchema)).append(""); } else { sb.append(dataType.getType().name()); if (dataType.getLength() > 0) { diff --git a/tajo-cli/src/main/java/org/apache/tajo/cli/tsql/commands/DescTableCommand.java b/tajo-cli/src/main/java/org/apache/tajo/cli/tsql/commands/DescTableCommand.java index b7d9334f42..a3960e61da 100644 --- a/tajo-cli/src/main/java/org/apache/tajo/cli/tsql/commands/DescTableCommand.java +++ b/tajo-cli/src/main/java/org/apache/tajo/cli/tsql/commands/DescTableCommand.java @@ -114,10 +114,7 @@ protected String toFormattedString(TableDesc desc) { for(int i = 0; i < desc.getSchema().size(); i++) { Column col = desc.getSchema().getColumn(i); - sb.append(col.getSimpleName()).append("\t").append(col.getDataType().getType()); - if (col.getDataType().hasLength()) { - sb.append("(").append(col.getDataType().getLength()).append(")"); - } + sb.append(col.getSimpleName()).append("\t").append(col.getTypeDesc()); sb.append("\n"); } diff --git a/tajo-core/src/test/java/org/apache/tajo/cli/tools/TestTajoDump.java b/tajo-core/src/test/java/org/apache/tajo/cli/tools/TestTajoDump.java index d6631f6c59..5819bd2dd3 100644 --- a/tajo-core/src/test/java/org/apache/tajo/cli/tools/TestTajoDump.java +++ b/tajo-core/src/test/java/org/apache/tajo/cli/tools/TestTajoDump.java @@ -41,6 +41,27 @@ public void testDump1() throws Exception { printWriter.close(); assertStrings(new String(bos.toByteArray())); bos.close(); + + executeString("DROP TABLE \"" + getCurrentDatabase() + "\".\"TableName1\""); + } + } + + @Test + public void testDump2() throws Exception { + if (!testingCluster.isHCatalogStoreRunning()) { + executeString("CREATE TABLE \"" + getCurrentDatabase() + + "\".\"TableName2\" (\"Age\" int, \"Name\" Record (\"FirstName\" TEXT, lastname TEXT))"); + + UserRoleInfo userInfo = UserRoleInfo.getCurrentUser(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + PrintWriter printWriter = new PrintWriter(bos); + TajoDump.dump(client, userInfo, getCurrentDatabase(), false, false, false, printWriter); + printWriter.flush(); + printWriter.close(); + assertStrings(new String(bos.toByteArray())); + bos.close(); + + executeString("DROP TABLE \"" + getCurrentDatabase() + "\".\"TableName2\""); } } } diff --git a/tajo-core/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java b/tajo-core/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java index d4a5a1f840..aee5a02556 100644 --- a/tajo-core/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java +++ b/tajo-core/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java @@ -205,17 +205,7 @@ public void testConnectDatabase() throws Exception { assertEquals(databaseName, tajoCli.getContext().getCurrentDatabase()); } - @Test - public void testDescTable() throws Exception { - String tableName; - if (cluster.isHCatalogStoreRunning()) { - tableName = "TEST_DESC_TABLE".toLowerCase(); - } else { - tableName = "TEST_DESC_TABLE"; - } - - String sql = "create table \"" + tableName + "\" (col1 int4, col2 int4);"; - + private void verifyDescTable(String sql, String tableName, String resultFileName) throws Exception { setVar(tajoCli, SessionVars.CLI_FORMATTER_CLASS, TajoCliOutputTestFormatter.class.getName()); tajoCli.executeScript(sql); @@ -226,11 +216,37 @@ public void testDescTable() throws Exception { FileSystem fs = FileSystem.get(testBase.getTestingCluster().getConfiguration()); if (!cluster.isHCatalogStoreRunning()) { - assertOutputResult("testDescTable.result", consoleResult, new String[]{"${table.path}"}, + assertOutputResult(resultFileName, consoleResult, new String[]{"${table.path}"}, new String[]{fs.getUri() + "/tajo/warehouse/default/" + tableName}); } } + @Test + public void testDescTable() throws Exception { + String tableName; + if (cluster.isHCatalogStoreRunning()) { + tableName = "TEST_DESC_TABLE".toLowerCase(); + } else { + tableName = "TEST_DESC_TABLE"; + } + + String sql = "create table \"" + tableName + "\" (col1 int4, col2 int4);"; + verifyDescTable(sql, tableName, "testDescTable.result"); + } + + @Test + public void testDescTableForNestedSchema() throws Exception { + String tableName; + if (cluster.isHCatalogStoreRunning()) { + tableName = "TEST_DESC_TABLE_NESTED".toLowerCase(); + } else { + tableName = "TEST_DESC_TABLE_NESTED"; + } + + String sql = "create table \"" + tableName + "\" (col1 int4, col2 int4, col3 record (col4 record (col5 text)));"; + verifyDescTable(sql, tableName, "testDescTableForNestedSchema.result"); + } + @Test public void testSelectResultWithNullFalse() throws Exception { String sql = diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result b/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result new file mode 100644 index 0000000000..83f360b6a0 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result @@ -0,0 +1,29 @@ +OK + +table name: default.TEST_DESC_TABLE_NESTED +table path: ${table.path} +store type: CSV +number of rows: 0 +volume: 0 B +Options: + 'text.delimiter'='|' + +schema: +col1 INT4 +col2 INT4 +col3 RECORD (col4 RECORD (col5 TEXT)) + + + +table name: default.TEST_DESC_TABLE_NESTED +table path: ${table.path} +store type: CSV +number of rows: 0 +volume: 0 B +Options: + 'text.delimiter'='|' + +schema: +col1 INT4 +col2 INT4 +col3 RECORD (col4 RECORD (col5 TEXT)) \ No newline at end of file diff --git a/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result b/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result new file mode 100644 index 0000000000..6c15e3eace --- /dev/null +++ b/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result @@ -0,0 +1,16 @@ +-- +-- Tajo database dump +-- + + +-- +-- Database name: "TestTajoDump" +-- + +CREATE DATABASE IF NOT EXISTS "TestTajoDump"; + +-- +-- Name: "TestTajoDump"."TableName2"; Type: TABLE; Storage: CSV +-- +CREATE TABLE "TestTajoDump"."TableName2" ("Age" INT4, "Name" RECORD ("FirstName" TEXT, lastname TEXT)) USING CSV WITH ('text.delimiter'='|'); + From b271d4d8946a6db26a25c83db1c8893d4ee24746 Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 12 Mar 2015 12:17:44 -0700 Subject: [PATCH 10/12] Update schema in xml files. --- .../src/main/resources/schemas/oracle/oracle.xml | 3 ++- .../src/main/resources/schemas/postgresql/postgresql.xml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml index 8945fcad83..946d47cb21 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml @@ -117,9 +117,10 @@ TID INT NOT NULL, COLUMN_NAME VARCHAR2(255) NOT NULL, ORDINAL_POSITION INT NOT NULL, + NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, COLUMN_NAME), + CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, ORDINAL_POSITION), FOREIGN KEY (TID) REFERENCES TABLES (TID) ON DELETE CASCADE )]]> diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/postgresql.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/postgresql.xml index 8e5cbccf56..65dfae5805 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/postgresql.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/postgresql/postgresql.xml @@ -76,9 +76,10 @@ xsi:schemaLocation="http://tajo.apache.org/catalogstore ../DBMSSchemaDefinition. TID INT NOT NULL, COLUMN_NAME VARCHAR(255) NOT NULL, ORDINAL_POSITION INT NOT NULL, + NESTED_FIELD_NUM INT NOT NULL, DATA_TYPE CHAR(16), TYPE_LENGTH INTEGER, - CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, COLUMN_NAME), + CONSTRAINT COLUMNS_PKEY PRIMARY KEY (TID, ORDINAL_POSITION), FOREIGN KEY (TID) REFERENCES TABLES (TID) ON DELETE CASCADE )]]> From cfa95ebd5bbae96d3f0b593283e4ca81518ee31b Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 12 Mar 2015 12:28:43 -0700 Subject: [PATCH 11/12] Updated catalog store driver versions. --- .../org/apache/tajo/catalog/store/MariaDBStore.java | 5 +++-- .../org/apache/tajo/catalog/store/MySQLStore.java | 11 ++++++----- .../org/apache/tajo/catalog/store/OracleStore.java | 2 +- .../apache/tajo/catalog/store/PostgreSQLStore.java | 4 ++-- .../src/main/resources/schemas/derby/derby.xml | 7 ++++++- .../src/main/resources/schemas/oracle/oracle.xml | 7 ++++++- .../main/resources/schemas/postgresql/postgresql.xml | 7 ++++++- 7 files changed, 30 insertions(+), 13 deletions(-) diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MariaDBStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MariaDBStore.java index 8cb385842f..0159b316d7 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MariaDBStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MariaDBStore.java @@ -26,8 +26,9 @@ import org.apache.tajo.exception.InternalException; public class MariaDBStore extends AbstractMySQLMariaDBStore { - /** 2014-06-09: First versioning */ - private static final int MARIADB_CATALOG_STORE_VERSION = 2; + /** 3 - 2015-03-12: Nested Schema (TAJO-1329) */ + /** 2 - First versioning */ + private static final int MARIADB_CATALOG_STORE_VERSION = 3; private static final String CATALOG_DRIVER = "org.mariadb.jdbc.Driver"; diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MySQLStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MySQLStore.java index cedc0fe84d..f7f1b1b45a 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MySQLStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/MySQLStore.java @@ -26,10 +26,11 @@ import org.apache.tajo.exception.InternalException; public class MySQLStore extends AbstractMySQLMariaDBStore { - /** 2014-03-20: First versioning */ - private static final int MYSQL_CATALOG_STORE_VERSION_2 = 2; - /** Before 2013-03-20 */ - private static final int MYSQL_CATALOG_STORE_VERSION_1 = 1; + + /** 3 - 2015-03-12: Nested Schema (TAJO-1329) */ + /** 2 - 2014-06-09: First versioning */ + /** 1- Before 2013-03-20 */ + private static final int MYSQL_CATALOG_STORE_VERSION = 3; private static final String CATALOG_DRIVER = "com.mysql.jdbc.Driver"; @Override @@ -43,7 +44,7 @@ public MySQLStore(final Configuration conf) throws InternalException { @Override public int getDriverVersion() { - return MYSQL_CATALOG_STORE_VERSION_2; + return MYSQL_CATALOG_STORE_VERSION; } @Override diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/OracleStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/OracleStore.java index 45c153c97a..4b7e6a3a7b 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/OracleStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/OracleStore.java @@ -27,7 +27,7 @@ import org.apache.tajo.exception.InternalException; public class OracleStore extends AbstractDBStore { - + private static final String CATALOG_DRIVER = "oracle.jdbc.OracleDriver"; public OracleStore(Configuration conf) throws InternalException { diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/PostgreSQLStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/PostgreSQLStore.java index 41f2909d98..6089fdd6fb 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/PostgreSQLStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/PostgreSQLStore.java @@ -27,7 +27,7 @@ import org.apache.tajo.exception.InternalException; public class PostgreSQLStore extends AbstractDBStore { - + private static final String CATALOG_DRIVER = "org.postgresql.Driver"; public PostgreSQLStore(Configuration conf) throws InternalException { @@ -48,7 +48,7 @@ protected String getCatalogSchemaPath() { protected Connection createConnection(Configuration conf) throws SQLException { return DriverManager.getConnection(getCatalogUri(), this.connectionId, this.connectionPassword); } - + @Override protected void createDatabaseDependants() throws CatalogException { diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml index 5ac6394be2..a0bd9cda61 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/derby/derby.xml @@ -17,7 +17,12 @@ limitations under the License. --> - + + + + + + diff --git a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml index 946d47cb21..880a14ece8 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml +++ b/tajo-catalog/tajo-catalog-server/src/main/resources/schemas/oracle/oracle.xml @@ -17,7 +17,12 @@ limitations under the License. --> - + + + + + + - + + + + + + From f3cf891791fef92d2872746c22f9f64c51df64ba Mon Sep 17 00:00:00 2001 From: Hyunsik Choi Date: Thu, 12 Mar 2015 18:27:58 -0700 Subject: [PATCH 12/12] Fixed the bug about the duplicated names in different level. --- .../java/org/apache/tajo/catalog/Schema.java | 5 +- .../org/apache/tajo/catalog/TestSchema.java | 31 ++++++++++ tajo-core/pom.xml | 56 +++++++++++++++++++ .../tajo/engine/query/TestCreateTable.java | 16 +++++- .../TestCreateTable/testNestedRecord1.sql | 1 + .../TestCreateTable/testNestedRecord2.sql | 1 + 6 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord1.sql create mode 100644 tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord2.sql diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java index 5e903b0a03..ed2cd2c651 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/Schema.java @@ -85,8 +85,11 @@ private static void deserializeColumn(List tobeAdded, List int childStartIndex = tobeAdded.size() - childNum; // Extract nested fields List nestedColumns = TUtil.newList(tobeAdded.subList(childStartIndex, childStartIndex + childNum)); + // Remove nested fields from the the current level - tobeAdded.removeAll(nestedColumns); + for (int i = 0; i < childNum; i++) { + tobeAdded.remove(tobeAdded.size() - 1); + } // Add the nested fields to the list as a single record column tobeAdded.add(new Column(columnProto.getName(), new TypeDesc(new Schema(nestedColumns)))); diff --git a/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java b/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java index 01c31241e2..edd0f3e5b1 100644 --- a/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java +++ b/tajo-catalog/tajo-catalog-common/src/test/java/org/apache/tajo/catalog/TestSchema.java @@ -260,6 +260,37 @@ public void testNestedRecord2() { verifySchema(nestedSchema2); } + @Test + public void testNestedRecord3() { + verifySchema(nestedSchema3); + } + + @Test + public void testNestedRecord4() { + Schema root = new Schema(); + + Schema nf2DotNf1 = new Schema(); + nf2DotNf1.addColumn("f1", Type.INT8); + nf2DotNf1.addColumn("f2", Type.INT8); + + Schema nf2DotNf2 = new Schema(); + nf2DotNf2.addColumn("f1", Type.INT8); + nf2DotNf2.addColumn("f2", Type.INT8); + + Schema nf2 = new Schema(); + nf2.addColumn("f1", Type.INT8); + nf2.addColumn("nf1", new TypeDesc(nf2DotNf1)); + nf2.addColumn("nf2", new TypeDesc(nf2DotNf2)); + nf2.addColumn("f2", Type.INT8); + + root.addColumn("f1", Type.INT8); + root.addColumn("nf1", Type.INT8); + root.addColumn("nf2", new TypeDesc(nf2)); + root.addColumn("f2", Type.INT8); + + verifySchema(root); + } + public static void verifySchema(Schema s1) { assertEquals(s1, s1); diff --git a/tajo-core/pom.xml b/tajo-core/pom.xml index 743180fc0a..38bddecd17 100644 --- a/tajo-core/pom.xml +++ b/tajo-core/pom.xml @@ -492,6 +492,62 @@ + + + all-dependencies + + false + + + + + org.apache.hadoop + hadoop-common + + + + org.apache.hadoop + hadoop-hdfs + + + commons-el + commons-el + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.mortbay.jetty + jsp-2.1-jetty + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-common + + + + + hcatalog-0.12.0 diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java index a9528da8e1..1fbe7c5825 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java @@ -602,14 +602,26 @@ public final void testCreateTableLike1() throws Exception { } @Test - public final void testCreateTableWithNestedRecord() throws Exception { + public final void testNestedRecord1() throws Exception { executeString("CREATE DATABASE D9;").close(); assertTableNotExists("d9.nested_table"); - executeString("CREATE TABLE D9.nested_table (f1 int, nested_field record (f2 int4, f3 int8), f3 text);").close(); + executeQuery().close(); assertTableExists("d9.nested_table"); executeString("DROP TABLE D9.nested_table"); executeString("DROP DATABASE D9").close(); } + + @Test + public final void testNestedRecord2() throws Exception { + executeString("CREATE DATABASE D9;").close(); + + assertTableNotExists("d9.nested_table2"); + executeQuery(); + assertTableExists("d9.nested_table2"); + + executeString("DROP TABLE D9.nested_table2"); + executeString("DROP DATABASE D9").close(); + } } diff --git a/tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord1.sql b/tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord1.sql new file mode 100644 index 0000000000..d14736151d --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord1.sql @@ -0,0 +1 @@ +CREATE TABLE D9.nested_table (f1 int, nested_field record (f2 int4, f3 int8), f3 text); \ No newline at end of file diff --git a/tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord2.sql b/tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord2.sql new file mode 100644 index 0000000000..f794d21470 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestCreateTable/testNestedRecord2.sql @@ -0,0 +1 @@ +CREATE TABLE D9.nested_table2 (f1 int, nf1 record (f1 int4, f3 double), nf2 record (f1 int4, nf1 record (f1 int4, f2 text), nf2 record (f1 int4, f2 text), f2 double), f2 text); \ No newline at end of file