Skip to content

Commit

Permalink
DRILL-7310: Move schema-related classes from exec module to be able t…
Browse files Browse the repository at this point in the history
…o use them in metastore module

closes #1816
  • Loading branch information
vvysotskyi authored and arina-ielchiieva committed Jun 29, 2019
1 parent e8d9b0a commit f3d6b69
Show file tree
Hide file tree
Showing 78 changed files with 675 additions and 485 deletions.
21 changes: 0 additions & 21 deletions exec/java-exec/pom.xml
Expand Up @@ -547,10 +547,6 @@
<groupId>sqlline</groupId>
<artifactId>sqlline</artifactId>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
</dependency>
</dependencies>

<profiles>
Expand Down Expand Up @@ -800,23 +796,6 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>${antlr.version}</version>
<configuration>
<listener>false</listener>
<visitor>true</visitor>
<outputDirectory>${project.build.directory}/generated-sources</outputDirectory>
</configuration>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<pluginManagement>
<plugins>
Expand Down
Expand Up @@ -119,8 +119,8 @@ public ColumnState buildColumn(ContainerState parent, ColumnMetadata columnSchem
* a vector, a writer, and the column state which binds the two together
* and manages the column.
*
* @param columnSchema schema of the new primitive column
* @param projType implied projection type for the column
* @param parent schema of the new primitive column
* @param colProj implied projection type for the column
* @return column state for the new column
*/

Expand Down Expand Up @@ -179,7 +179,8 @@ private ColumnState buildPrimitive(ContainerState parent, ColumnReadProjection c
* here, instead we create a tuple state to hold the columns, and defer the
* map vector (or vector container) until harvest time.
*
* @param columnSchema description of the map column
* @param parent description of the map column
* @param colProj implied projection type for the column
* @return column state for the map column
*/

Expand Down Expand Up @@ -295,7 +296,7 @@ private ColumnState buildMapArray(ContainerState parent, ColumnReadProjection co
* does so. Unions are fully tested in the row set writer mechanism.
*
* @param parent container
* @param columnSchema column schema
* @param colProj column schema
* @return column
*/
private ColumnState buildUnion(ContainerState parent, ColumnReadProjection colProj) {
Expand Down Expand Up @@ -362,7 +363,7 @@ private ColumnState buildList(ContainerState parent, ColumnReadProjection colPro
* not support the <tt>ListVector</tt> type.
*
* @param parent the parent (tuple, union or list) that holds this list
* @param columnSchema metadata description of the list which must contain
* @param colProj metadata description of the list which must contain
* exactly one subtype
* @return the column state for the list
*/
Expand Down Expand Up @@ -421,7 +422,7 @@ private ColumnState buildSimpleList(ContainerState parent, ColumnReadProjection
* not support the <tt>ListVector</tt> type.
*
* @param parent the parent (tuple, union or list) that holds this list
* @param columnSchema metadata description of the list (must be empty of
* @param colProj metadata description of the list (must be empty of
* subtypes)
* @return the column state for the list
*/
Expand Down
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.record;

import org.apache.drill.exec.record.metadata.SchemaBuilder;

public class BatchSchemaBuilder {
private BatchSchema.SelectionVectorMode svMode = BatchSchema.SelectionVectorMode.NONE;
private SchemaBuilder schemaBuilder;

public BatchSchemaBuilder() {
}

/**
* Create a new schema starting with the base schema. Allows appending
* additional columns to an additional schema.
*/
public BatchSchemaBuilder(BatchSchema baseSchema) {
schemaBuilder = new SchemaBuilder();
for (MaterializedField field : baseSchema) {
schemaBuilder.add(field);
}
}

public BatchSchemaBuilder withSVMode(BatchSchema.SelectionVectorMode svMode) {
this.svMode = svMode;
return this;
}

public BatchSchemaBuilder withSchemaBuilder(SchemaBuilder schemaBuilder) {
this.schemaBuilder = schemaBuilder;
return this;
}

public SchemaBuilder schemaBuilder() {
return schemaBuilder;
}

public BatchSchema build() {
return new BatchSchema(svMode, schemaBuilder.buildSchema().toFieldList());
}
}
Expand Up @@ -29,6 +29,9 @@
import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.ops.OperatorContext;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.record.metadata.TupleSchema;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.UnionVector;

Expand Down Expand Up @@ -176,4 +179,12 @@ public static VectorContainer coerceContainer(VectorAccessible in, BatchSchema t
Preconditions.checkState(vectorMap.size() == 0, "Leftover vector from incoming batch");
return c;
}

public static TupleMetadata fromBatchSchema(BatchSchema batchSchema) {
TupleSchema tuple = new TupleSchema();
for (MaterializedField field : batchSchema) {
tuple.add(MetadataUtils.fromView(field));
}
return tuple;
}
}
Expand Up @@ -26,7 +26,6 @@
import org.apache.drill.metastore.statistics.TableStatisticsKind;
import org.apache.drill.metastore.statistics.Statistic;
import org.apache.drill.exec.planner.common.DrillStatsTable;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.metastore.metadata.BaseMetadata;
import org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata;
Expand Down Expand Up @@ -212,12 +211,12 @@ public TableMetadata getTableMetadata() {

if (this.schema == null) {
schema = new TupleSchema();
fields.forEach((schemaPath, majorType) -> MetadataUtils.addColumnMetadata(schema, schemaPath, majorType));
fields.forEach((schemaPath, majorType) -> SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType));
} else {
// merges specified schema with schema from table
fields.forEach((schemaPath, majorType) -> {
if (SchemaPathUtils.getColumnMetadata(schemaPath, schema) == null) {
MetadataUtils.addColumnMetadata(schema, schemaPath, majorType);
SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType);
}
});
}
Expand Down
Expand Up @@ -25,7 +25,6 @@
import org.apache.drill.metastore.metadata.TableInfo;
import org.apache.drill.metastore.statistics.TableStatisticsKind;
import org.apache.drill.metastore.statistics.Statistic;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.record.metadata.TupleSchema;
import org.apache.drill.exec.resolver.TypeCastRules;
Expand All @@ -42,6 +41,7 @@
import org.apache.drill.metastore.metadata.PartitionMetadata;
import org.apache.drill.metastore.metadata.RowGroupMetadata;
import org.apache.drill.metastore.statistics.StatisticsHolder;
import org.apache.drill.metastore.util.SchemaPathUtils;
import org.apache.drill.metastore.util.TableMetadataUtils;
import org.apache.drill.metastore.statistics.ExactStatisticsConstants;
import org.apache.drill.exec.expr.StatisticsProvider;
Expand Down Expand Up @@ -149,7 +149,7 @@ public static RowGroupMetadata getRowGroupMetadata(MetadataBase.ParquetTableMeta
Map<SchemaPath, TypeProtos.MajorType> columns = getRowGroupFields(tableMetadata, rowGroupMetadata);

TupleSchema schema = new TupleSchema();
columns.forEach((schemaPath, majorType) -> MetadataUtils.addColumnMetadata(schema, schemaPath, majorType));
columns.forEach((schemaPath, majorType) -> SchemaPathUtils.addColumnMetadata(schema, schemaPath, majorType));

MetadataInfo metadataInfo = new MetadataInfo(MetadataType.ROW_GROUP, MetadataInfo.GENERAL_INFO_KEY, null);

Expand Down
Expand Up @@ -33,6 +33,7 @@
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.test.BaseTestQuery;
import org.hamcrest.CoreMatchers;
Expand Down Expand Up @@ -998,8 +999,10 @@ public void testBooleanConditionsMode() throws Exception {
"isdate(employee_id)",
"NOT (employee_id IS NULL)");

BatchSchema expectedSchema = new SchemaBuilder()
.add("col1", TypeProtos.MinorType.BIT)
SchemaBuilder schemaBuilder = new SchemaBuilder()
.add("col1", TypeProtos.MinorType.BIT);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

for (String condition : conditions) {
Expand Down
19 changes: 12 additions & 7 deletions exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
Expand Up @@ -23,6 +23,7 @@
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.test.BaseTestQuery;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -524,11 +525,13 @@ public void testCaseSenJoinCTEWithRegTab() throws Exception {
@Test // DRILL-5845
public void testSchemaForStarOrderByLimit() throws Exception {
final String query = "select * from cp.`tpch/nation.parquet` order by n_name limit 1";
final BatchSchema expectedSchema = new SchemaBuilder()
SchemaBuilder schemaBuilder = new SchemaBuilder()
.add("n_nationkey", TypeProtos.MinorType.INT)
.add("n_name",TypeProtos.MinorType.VARCHAR)
.add("n_name", TypeProtos.MinorType.VARCHAR)
.add("n_regionkey", TypeProtos.MinorType.INT)
.add("n_comment", TypeProtos.MinorType.VARCHAR)
.add("n_comment", TypeProtos.MinorType.VARCHAR);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand All @@ -540,11 +543,13 @@ public void testSchemaForStarOrderByLimit() throws Exception {

@Test // DRILL-5822
public void testSchemaForParallelizedStarOrderBy() throws Exception {
final String query = "select * from cp.`tpch/region.parquet` order by r_name";
final BatchSchema expectedSchema = new SchemaBuilder()
String query = "select * from cp.`tpch/region.parquet` order by r_name";
SchemaBuilder schemaBuilder = new SchemaBuilder()
.add("r_regionkey", TypeProtos.MinorType.INT)
.add("r_name",TypeProtos.MinorType.VARCHAR)
.add("r_comment", TypeProtos.MinorType.VARCHAR)
.add("r_name", TypeProtos.MinorType.VARCHAR)
.add("r_comment", TypeProtos.MinorType.VARCHAR);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand Down
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.drill;

import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;

import org.apache.commons.lang3.tuple.Pair;
Expand Down Expand Up @@ -1250,8 +1251,10 @@ public void testUnionAllLeftEmptyDir() throws Exception {

@Test
public void testUnionAllBothEmptyDirs() throws Exception {
final BatchSchema expectedSchema = new SchemaBuilder()
.addNullable("key", TypeProtos.MinorType.INT)
SchemaBuilder schemaBuilder = new SchemaBuilder()
.addNullable("key", TypeProtos.MinorType.INT);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand Down
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.drill;

import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;

import org.apache.commons.io.FileUtils;
Expand Down Expand Up @@ -826,8 +827,10 @@ public void testUnionLeftEmptyDir() throws Exception {

@Test
public void testUnionBothEmptyDirs() throws Exception {
final BatchSchema expectedSchema = new SchemaBuilder()
.addNullable("key", TypeProtos.MinorType.INT)
SchemaBuilder schemaBuilder = new SchemaBuilder()
.addNullable("key", TypeProtos.MinorType.INT);
final BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand Down
19 changes: 13 additions & 6 deletions exec/java-exec/src/test/java/org/apache/drill/TestUntypedNull.java
Expand Up @@ -22,6 +22,7 @@
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.ClusterFixtureBuilder;
Expand Down Expand Up @@ -115,8 +116,10 @@ public void testTypeAndMode() throws Exception {
@Test
public void testCoalesceOnNotExistentColumns() throws Exception {
String query = "select coalesce(unk1, unk2) as coal from cp.`tpch/nation.parquet` limit 5";
BatchSchema expectedSchema = new SchemaBuilder()
.add("coal", UNTYPED_NULL_TYPE)
SchemaBuilder schemaBuilder = new SchemaBuilder()
.add("coal", UNTYPED_NULL_TYPE);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand All @@ -135,8 +138,10 @@ public void testCoalesceOnNotExistentColumns() throws Exception {
@Test
public void testCoalesceOnNotExistentColumnsWithGroupBy() throws Exception {
String query = "select coalesce(unk1, unk2) as coal from cp.`tpch/nation.parquet` group by 1";
BatchSchema expectedSchema = new SchemaBuilder()
.add("coal", UNTYPED_NULL_TYPE)
SchemaBuilder schemaBuilder = new SchemaBuilder()
.add("coal", UNTYPED_NULL_TYPE);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand All @@ -155,8 +160,10 @@ public void testCoalesceOnNotExistentColumnsWithGroupBy() throws Exception {
@Test
public void testCoalesceOnNotExistentColumnsWithOrderBy() throws Exception {
String query = "select coalesce(unk1, unk2) as coal from cp.`tpch/nation.parquet` order by 1 limit 5";
BatchSchema expectedSchema = new SchemaBuilder()
.add("coal", UNTYPED_NULL_TYPE)
SchemaBuilder schemaBuilder = new SchemaBuilder()
.add("coal", UNTYPED_NULL_TYPE);
BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();

testBuilder()
Expand Down

0 comments on commit f3d6b69

Please sign in to comment.