From 6a50954faa95c3f09b837872cd63af41baed1fb4 Mon Sep 17 00:00:00 2001 From: Vitalii Diravka Date: Mon, 13 Jun 2016 11:44:33 +0000 Subject: [PATCH 1/2] DRILL-4682: Allow full schema identifier in SELECT clause - changed SqlBracketlessSyntax logic in the DrillCompoundIdentifier; - added checking of using full schema name in column names; - added unit test testColumnNamesWithSchemaName; --- .../parser/CompoundIdentifierConverter.java | 18 +++++- .../sql/parser/DrillCompoundIdentifier.java | 60 +++++++++++++++---- .../org/apache/drill/TestExampleQueries.java | 23 ++++++- 3 files changed, 87 insertions(+), 14 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java index fa0d3195813..eb9f3affca2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java @@ -19,10 +19,13 @@ import java.util.List; import java.util.Map; +import java.util.Set; +import com.google.common.collect.Sets; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOrderBy; import org.apache.calcite.sql.SqlSelect; @@ -43,13 +46,14 @@ public class CompoundIdentifierConverter extends SqlShuttle { // private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CompoundIdentifierConverter.class); + private Set fullSchemasSet = Sets.newHashSet(); private boolean enableComplex = true; @Override public SqlNode visit(SqlIdentifier id) { if(id instanceof DrillCompoundIdentifier){ if(enableComplex){ - return ((DrillCompoundIdentifier) id).getAsSqlNode(); + return ((DrillCompoundIdentifier) id).getAsSqlNode(fullSchemasSet); }else{ return ((DrillCompoundIdentifier) id).getAsCompoundIdentifier(); } @@ -115,6 +119,18 @@ public SqlNode visitChild( enableComplex = true; } } + if (expr.getKind() == SqlKind.SELECT) { + if (((SqlSelect) expr).getFrom() instanceof DrillCompoundIdentifier) { + fullSchemasSet.add((DrillCompoundIdentifier) ((SqlSelect) expr).getFrom()); + } else if (((SqlSelect) expr).getFrom() instanceof SqlJoin) { + if (((SqlJoin) ((SqlSelect) expr).getFrom()).getLeft() instanceof DrillCompoundIdentifier){ + fullSchemasSet.add((DrillCompoundIdentifier) ((SqlJoin) ((SqlSelect) expr).getFrom()).getLeft()); + } + if (((SqlJoin) ((SqlSelect) expr).getFrom()).getRight() instanceof DrillCompoundIdentifier){ + fullSchemasSet.add((DrillCompoundIdentifier) ((SqlJoin) ((SqlSelect) expr).getFrom()).getRight()); + } + } + } SqlNode newOperand = operand.accept(CompoundIdentifierConverter.this); enableComplex = localEnableComplex; if (newOperand != operand) { diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillCompoundIdentifier.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillCompoundIdentifier.java index fe96be43712..b5ae267bc96 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillCompoundIdentifier.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/DrillCompoundIdentifier.java @@ -19,6 +19,7 @@ import java.util.Collections; import java.util.List; +import java.util.Set; import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlIdentifier; @@ -69,15 +70,15 @@ public void addIndex(int index, SqlParserPos pos){ } } - public SqlNode getAsSqlNode(){ - if(ids.size() == 1){ + public SqlNode getAsSqlNode(Set fullSchemasSet) { + if (ids.size() == 1) { return new SqlIdentifier(Collections.singletonList(ids.get(0).value), ids.get(0).parserPos); } int startIndex; SqlNode node; - if(ids.get(1).isArray()){ + if (ids.get(1).isArray()) { // handle everything post zero index as item operator. startIndex = 1; node = new SqlIdentifier( // @@ -85,15 +86,22 @@ public SqlNode getAsSqlNode(){ null, // ids.get(0).parserPos, // ImmutableList.of(ids.get(0).parserPos)); - }else{ - // handle everything post two index as item operator. - startIndex = 2; - node = new SqlIdentifier( // - ImmutableList.of(ids.get(0).value, ids.get(1).value), // - null, // - ids.get(0).parserPos, // - ImmutableList.of(ids.get(0).parserPos, ids.get(1).parserPos)); - + } else { + int fullSchemaNamesCount = getFullSchemaIdentifiersCountInColumnName(fullSchemasSet); + if (fullSchemaNamesCount != 0) { + // the case when full schema identifier is used in select clause + // handle everything post two index (except schema identifier) as item operator. + startIndex = fullSchemaNamesCount + 1; + node = getAsCompoundIdentifier(startIndex); + } else { + // handle everything post two index as item operator. + startIndex = 2; + node = new SqlIdentifier( // + ImmutableList.of(ids.get(0).value, ids.get(1).value), // + null, // + ids.get(0).parserPos, // + ImmutableList.of(ids.get(0).parserPos, ids.get(1).parserPos)); + } } for(int i = startIndex ; i < ids.size(); i++){ node = ids.get(i).getNode(node); @@ -114,6 +122,34 @@ public SqlNode getAsCompoundIdentifier(){ return new SqlIdentifier(names, null, pos.get(0), pos); } + private SqlNode getAsCompoundIdentifier(int numberOfIdentifiers) { + List names = Lists.newArrayListWithCapacity(ids.size()); + List pos = Lists.newArrayListWithCapacity(ids.size()); + for(int i =0; i < numberOfIdentifiers; i++){ + IdentifierHolder holder = ids.get(i); + names.add(holder.value); + pos.add(holder.parserPos); + } + return new SqlIdentifier( + ImmutableList.copyOf(names), + null, + ids.get(0).parserPos, + ImmutableList.copyOf(pos)); + } + + private int getFullSchemaIdentifiersCountInColumnName(Set fullSchemasSet) { + for (DrillCompoundIdentifier fullSchema : fullSchemasSet) { + if (fullSchema != null && ids.size() > fullSchema.ids.size()) { + List possibleFullSchemaNames = getNames(ids.subList(0, fullSchema.ids.size())); + List fullSchemaNames = getNames(fullSchema.ids); + if (possibleFullSchemaNames.equals(fullSchemaNames)) { + return fullSchema.ids.size(); + } + } + } + return 0; + } + private static class IdentifierHolder{ String value; SqlParserPos parserPos; diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java index 97df2ee9082..6468186642c 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java @@ -1195,4 +1195,25 @@ public void testDateImplicitCasting() throws Exception { .run(); } -} \ No newline at end of file + @Test // DRILL-4682 + public void testColumnNamesWithSchemaName() throws Exception { + test("SELECT cp.`employee.json`.department_id FROM cp.`employee.json` ORDER BY cp.`employee.json`.department_id"); + test("SELECT COUNT(cp.`employee.json`.department_id) FROM cp.`employee.json`"); + test("SELECT cp.`employee.json`.employee_id, cp.`employee.json`.full_name, cp.`department.json`.department_id " + + "FROM cp.`employee.json` LEFT JOIN cp.`department.json` " + + "ON cp.`employee.json`.department_id = cp.`department.json`.department_id " + + "WHERE cp.`department.json`.department_description = 'HQ Marketing'"); + test("SELECT cp.`employee.json`.employee_id, t.department_id " + + "FROM cp.`employee.json` LEFT JOIN (select * from cp.`department.json`) t " + + "ON cp.`employee.json`.department_id = t.department_id " + + "WHERE t.department_description = 'HQ Marketing'"); + + // TODO: Can be uncommented after resolving "DRILL-3993: Rebase Drill on Calcite master branch" + // Fixed in "CALCITE-881: Allow schema.table.column references in GROUP BY". + // test("SELECT cp.`employee.json`.employee_id FROM cp.`employee.json` GROUP BY cp.`employee.json`.employee_id"); + + // TODO: Can be uncommented after resolving "CALCITE-1323: Wrong prefix number in DelegatingScope.fullyQualify()" + // test("SELECT cp.`employee.json`.* FROM cp.`employee.json`"); + } + +} From 2c505a3c858b34663da1090d223365c392293fae Mon Sep 17 00:00:00 2001 From: Vitalii Diravka Date: Fri, 22 Jul 2016 18:40:29 +0000 Subject: [PATCH 2/2] =?UTF-8?q?Code=20changes=20ac=D1=81ording=20to=20the?= =?UTF-8?q?=20review?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../parser/CompoundIdentifierConverter.java | 57 ++++-- .../DrillParserWithCompoundIdConverter.java | 2 + .../org/apache/drill/TestExampleQueries.java | 23 +-- .../TestSelectWithFullNameIdentifiers.java | 180 ++++++++++++++++++ 4 files changed, 228 insertions(+), 34 deletions(-) create mode 100644 exec/java-exec/src/test/java/org/apache/drill/TestSelectWithFullNameIdentifiers.java diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java index eb9f3affca2..d59319c6540 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/CompoundIdentifierConverter.java @@ -22,6 +22,7 @@ import java.util.Set; import com.google.common.collect.Sets; +import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlJoin; @@ -119,18 +120,7 @@ public SqlNode visitChild( enableComplex = true; } } - if (expr.getKind() == SqlKind.SELECT) { - if (((SqlSelect) expr).getFrom() instanceof DrillCompoundIdentifier) { - fullSchemasSet.add((DrillCompoundIdentifier) ((SqlSelect) expr).getFrom()); - } else if (((SqlSelect) expr).getFrom() instanceof SqlJoin) { - if (((SqlJoin) ((SqlSelect) expr).getFrom()).getLeft() instanceof DrillCompoundIdentifier){ - fullSchemasSet.add((DrillCompoundIdentifier) ((SqlJoin) ((SqlSelect) expr).getFrom()).getLeft()); - } - if (((SqlJoin) ((SqlSelect) expr).getFrom()).getRight() instanceof DrillCompoundIdentifier){ - fullSchemasSet.add((DrillCompoundIdentifier) ((SqlJoin) ((SqlSelect) expr).getFrom()).getRight()); - } - } - } + setFullSchemasSet(expr); SqlNode newOperand = operand.accept(CompoundIdentifierConverter.this); enableComplex = localEnableComplex; if (newOperand != operand) { @@ -199,4 +189,47 @@ private static RewriteType[] R(RewriteType... types){ return types; } + /** + * This is the method which fills fullSchemasSet with full schema identifiers. + * @param expr {@link SqlNode} (SQL parse tree) of input query. + */ + private void setFullSchemasSet(SqlNode expr) { + if (expr.getKind() == SqlKind.SELECT) { + SqlSelect sqlSelect = (SqlSelect) expr; + for (SqlNode field: sqlSelect.getSelectList().getList()) { + setFullSchemasSet(field); + } + if (sqlSelect.getFrom() instanceof DrillCompoundIdentifier) { + fullSchemasSet.add((DrillCompoundIdentifier) sqlSelect.getFrom()); + } + if (sqlSelect.getFrom() instanceof SqlJoin) { + SqlJoin sqlJoin = (SqlJoin) sqlSelect.getFrom(); + if (((SqlJoin) sqlSelect.getFrom()).getLeft() instanceof DrillCompoundIdentifier) { + fullSchemasSet.add((DrillCompoundIdentifier) (sqlJoin.getLeft())); + } + if (((SqlJoin) sqlSelect.getFrom()).getRight() instanceof DrillCompoundIdentifier) { + fullSchemasSet.add((DrillCompoundIdentifier) (sqlJoin.getRight())); + } + } + if (sqlSelect.getWhere() instanceof SqlBasicCall) { + findSelectInOperandList((SqlBasicCall) sqlSelect.getWhere()); + } + } + } + + /** + * This is the method which finds SqlSelect in where operand list. + * If {@link SqlSelect} is found setFullSchemasSet() method is run. + * @param operandList Keeps the list of operands. + */ + private void findSelectInOperandList(SqlBasicCall operandList) { + for (SqlNode operand : operandList.getOperandList()) { + if (operand instanceof SqlSelect) { + setFullSchemasSet(operand); + } else if (operand instanceof SqlBasicCall) { + findSelectInOperandList((SqlBasicCall) operand); + } + } + } + } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/impl/DrillParserWithCompoundIdConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/impl/DrillParserWithCompoundIdConverter.java index 6048d847ab1..d66ffb1e418 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/impl/DrillParserWithCompoundIdConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/parser/impl/DrillParserWithCompoundIdConverter.java @@ -57,6 +57,8 @@ public SqlNode parseSqlExpressionEof() throws Exception { @Override public SqlNode parseSqlStmtEof() throws Exception { SqlNode originalSqlNode = super.parseSqlStmtEof(); + // TODO: When calcite will support recognizing map and array complex types + // need to return for using original SqlNode from calcite without converting return originalSqlNode.accept(createConverter()); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java index 6468186642c..97df2ee9082 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java @@ -1195,25 +1195,4 @@ public void testDateImplicitCasting() throws Exception { .run(); } - @Test // DRILL-4682 - public void testColumnNamesWithSchemaName() throws Exception { - test("SELECT cp.`employee.json`.department_id FROM cp.`employee.json` ORDER BY cp.`employee.json`.department_id"); - test("SELECT COUNT(cp.`employee.json`.department_id) FROM cp.`employee.json`"); - test("SELECT cp.`employee.json`.employee_id, cp.`employee.json`.full_name, cp.`department.json`.department_id " + - "FROM cp.`employee.json` LEFT JOIN cp.`department.json` " + - "ON cp.`employee.json`.department_id = cp.`department.json`.department_id " + - "WHERE cp.`department.json`.department_description = 'HQ Marketing'"); - test("SELECT cp.`employee.json`.employee_id, t.department_id " + - "FROM cp.`employee.json` LEFT JOIN (select * from cp.`department.json`) t " + - "ON cp.`employee.json`.department_id = t.department_id " + - "WHERE t.department_description = 'HQ Marketing'"); - - // TODO: Can be uncommented after resolving "DRILL-3993: Rebase Drill on Calcite master branch" - // Fixed in "CALCITE-881: Allow schema.table.column references in GROUP BY". - // test("SELECT cp.`employee.json`.employee_id FROM cp.`employee.json` GROUP BY cp.`employee.json`.employee_id"); - - // TODO: Can be uncommented after resolving "CALCITE-1323: Wrong prefix number in DelegatingScope.fullyQualify()" - // test("SELECT cp.`employee.json`.* FROM cp.`employee.json`"); - } - -} +} \ No newline at end of file diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithFullNameIdentifiers.java b/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithFullNameIdentifiers.java new file mode 100644 index 00000000000..325ab258160 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/TestSelectWithFullNameIdentifiers.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill; + +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.List; + +public class TestSelectWithFullNameIdentifiers extends BaseTestQuery { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestSelectWithFullNameIdentifiers.class); + + // create table in dfs_test + @BeforeClass + public static void createTableForTest() throws Exception { + test("USE dfs_test"); + test("CREATE TABLE tmp.`department` (department_id, department_description) as select department_id, department_description from cp.`department.json`"); + } + + // delete table from dfs_test + @AfterClass + public static void dropCreatedTable() throws Exception { + test("DROP TABLE IF EXISTS dfs_test.tmp.`department`"); + } + + @Test + public void testSimpleQuery() throws Exception { + // Query with using scalar function, WHERE statement, ORDER BY statement + testBuilder() + .sqlQuery("SELECT cp.`employee.json`.employee_id, UPPER(cp.`employee.json`.full_name) full_name " + + "FROM cp.`employee.json` " + + "WHERE cp.`employee.json`.department_id = 4 " + + "ORDER BY cp.`employee.json`.full_name") + .unOrdered() + .baselineColumns("employee_id", "full_name") + .baselineValues(43L, "JUANITA SHARP") + .baselineValues(44L, "SANDRA BRUNNER") + .build() + .run(); + } + + @Test + @Ignore + public void testQueryWithStar() throws Exception { + // Query with using full schema name for star identifier + // TODO: Can be used after resolving "CALCITE-1323: Wrong prefix number in DelegatingScope.fullyQualify()" + testBuilder() + .sqlQuery("SELECT cp.`department.json`.* FROM cp.`department.json` LIMIT 1") + .unOrdered() + .baselineColumns("department_id", "department_description") + .baselineValues(1L, "HQ General Management") + .build() + .run(); + } + + @Test + @Ignore + public void testQueryWithAggregation() throws Exception { + // Query with aggregation + // TODO: Can be used after resolving "DRILL-3993: Rebase Drill on Calcite master branch" + // Fixed in "CALCITE-881: Allow schema.table.column references in GROUP BY". + testBuilder() + .sqlQuery("SELECT cp.`employee.json`.position_title, " + + "COUNT(cp.`employee.json`.employee_id) employee_number " + + "FROM cp.`employee.json` " + + "WHERE cp.`employee.json`.position_title = 'Store Permanent Stocker' " + + "GROUP BY cp.`employee.json`.position_title") + .unOrdered() + .baselineColumns("position_title", "employee_number") + .baselineValues("Store Permanent Stocker", 222L) + .build() + .run(); + + } + + @Test + public void testLeftJoin() throws Exception { + // Query with left join (with different schema-qualified tables) + testBuilder() + .sqlQuery("SELECT cp.`employee.json`.employee_id, cp.`employee.json`.full_name, dfs_test.tmp.`department`.department_id " + + "FROM cp.`employee.json` LEFT JOIN dfs_test.tmp.`department` " + + "ON cp.`employee.json`.department_id = dfs_test.tmp.`department`.department_id " + + "WHERE dfs_test.tmp.`department`.department_description = 'HQ Marketing' " + + "ORDER BY cp.`employee.json`.full_name") + .unOrdered() + .baselineColumns("employee_id", "full_name", "department_id") + .baselineValues(36L, "Donna Arnold", 3L) + .baselineValues(42L, "Doris Carter", 3L) + .baselineValues(41L, "Howard Bechard", 3L) + .baselineValues(7L, "Rebecca Kanagaki", 3L) + .build() + .run(); + } + + @Test + @Ignore + public void testNestedQueryInWhereStatement() throws Exception { + // Query with sub-query in where statement (with different schema-qualified tables) + // TODO: Calcite issue: AssertionError: must call validate first + testBuilder() + .sqlQuery("SELECT cp.`employee.json`.employee_id, cp.`employee.json`.department_id " + + "FROM cp.`employee.json` " + + "WHERE cp.`employee.json`.employee_id < 5 " + + "AND cp.`employee.json`.department_id IN " + + "(SELECT dfs_test.tmp.`department`.department_id " + + "FROM dfs_test.tmp.`department` " + + "WHERE dfs_test.tmp.`department`.department_id > 0)") + .unOrdered() + .baselineColumns("employee_id", "department_id") + .baselineValues(1L, 1L) + .baselineValues(2L, 1L) + .baselineValues(4L, 1L) + .build() + .run(); + } + + @Test + @Ignore + public void testCorrelatedSubQuery() throws Exception { + // Correlated sub-query (nested query references the enclosing query with another schema-qualified table) + // TODO: Calcite issue: AssertionError + testBuilder() + .sqlQuery("SELECT dfs_test.tmp.`department`.department_id, " + + "(SELECT COUNT(cp.`employee.json`.employee_id) FROM cp.`employee.json` " + + "WHERE cp.`employee.json`.department_id = dfs_test.tmp.`department`.department_id) count_employee " + + "FROM dfs_test.tmp.`department` " + + "WHERE dfs_test.tmp.`department`.department_id = 1") + .unOrdered() + .baselineColumns("department_id", "count_employee") + .baselineValues(1L, 7L) + .build() + .run(); + } + + @Test + public void testNestedQueryInFromStatement() throws Exception { + testBuilder() + .sqlQuery("SELECT * FROM (SELECT cp.`employee.json`.employee_id, cp.`employee.json`.full_name " + + "FROM cp.`employee.json`) t " + + "WHERE t.employee_id = 1") + .unOrdered() + .baselineColumns("employee_id", "full_name") + .baselineValues(1L, "Sheri Nowmer") + .build() + .run(); + } + + @Test + public void testNestedQueryInLeftJoinStatement() throws Exception { + // Query with sub-query in left join statement (with different schema-qualified tables) + testBuilder() + .sqlQuery("SELECT cp.`employee.json`.employee_id, cp.`employee.json`.full_name, t.store_id " + + "FROM cp.`employee.json` LEFT JOIN (select cp.`store.json`.store_id from cp.`store.json`) t " + + "ON cp.`employee.json`.store_id = t.store_id " + + "WHERE cp.`employee.json`.employee_id = 1") + .unOrdered() + .baselineColumns("employee_id", "full_name", "store_id") + .baselineValues(1L, "Sheri Nowmer", 0L) + .build() + .run(); + } +}