Skip to content

Commit

Permalink
DRILL-2598: Throw validation error if CREATE VIEW/CTAS contains dupli…
Browse files Browse the repository at this point in the history
…cate columns in definition
  • Loading branch information
vkorukanti committed May 5, 2015
1 parent 703314b commit ed02612
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 15 deletions.
Expand Up @@ -17,7 +17,9 @@
*/
package org.apache.drill.exec.planner.sql.handlers;

import com.google.common.collect.Sets;
import org.apache.calcite.schema.Table;
import org.apache.calcite.sql.TypedSqlNode;
import org.apache.calcite.tools.Planner;
import org.apache.calcite.tools.RelConversionException;
import org.apache.drill.common.exceptions.DrillException;
Expand All @@ -33,6 +35,7 @@
import org.apache.calcite.sql.SqlNode;
import org.apache.drill.exec.store.ischema.Records;

import java.util.HashSet;
import java.util.List;

public class SqlHandlerUtil {
Expand All @@ -55,12 +58,23 @@ public class SqlHandlerUtil {
public static RelNode resolveNewTableRel(boolean isNewTableView, Planner planner, List<String> tableFieldNames,
SqlNode newTableQueryDef) throws ValidationException, RelConversionException {

SqlNode validatedQuery = planner.validate(newTableQueryDef);
RelNode validatedQueryRelNode = planner.convert(validatedQuery);

if (tableFieldNames.size() > 0) {
final RelDataType queryRowType = validatedQueryRelNode.getRowType();
TypedSqlNode validatedSqlNodeWithType = planner.validateAndGetType(newTableQueryDef);

// Get the row type of view definition query.
// Reason for getting the row type from validated SqlNode than RelNode is because SqlNode -> RelNode involves
// renaming duplicate fields which is not desired when creating a view or table.
// For ex: SELECT region_id, region_id FROM cp.`region.json` LIMIT 1 returns
// +------------+------------+
// | region_id | region_id0 |
// +------------+------------+
// | 0 | 0 |
// +------------+------------+
// which is not desired when creating new views or tables.
final RelDataType queryRowType = validatedSqlNodeWithType.getType();
final RelNode validatedQueryRelNode = planner.convert(validatedSqlNodeWithType.getSqlNode());

if (tableFieldNames.size() > 0) {
// Field count should match.
if (tableFieldNames.size() != queryRowType.getFieldCount()) {
final String tblType = isNewTableView ? "view" : "table";
Expand All @@ -78,6 +92,9 @@ public static RelNode resolveNewTableRel(boolean isNewTableView, Planner planner
}
}

// validate the given field names to make sure there are no duplicates
ensureNoDuplicateColumnNames(tableFieldNames);

// CTAS statement has table field list (ex. below), add a project rel to rename the query fields.
// Ex. CREATE TABLE tblname(col1, medianOfCol2, avgOfCol3) AS
// SELECT col1, median(col2), avg(col3) FROM sourcetbl GROUP BY col1 ;
Expand All @@ -86,9 +103,22 @@ public static RelNode resolveNewTableRel(boolean isNewTableView, Planner planner
return DrillRelOptUtil.createRename(validatedQueryRelNode, tableFieldNames);
}

// As the column names of the view are derived from SELECT query, make sure the query has no duplicate column names
ensureNoDuplicateColumnNames(queryRowType.getFieldNames());

return validatedQueryRelNode;
}

private static void ensureNoDuplicateColumnNames(List<String> fieldNames) throws ValidationException {
final HashSet<String> fieldHashSet = Sets.newHashSetWithExpectedSize(fieldNames.size());
for(String field : fieldNames) {
if (fieldHashSet.contains(field.toLowerCase())) {
throw new ValidationException(String.format("Duplicate column name [%s]", field));
}
fieldHashSet.add(field.toLowerCase());
}
}

public static Table getTableFromSchema(AbstractSchema drillSchema, String tblName) throws DrillException {
try {
return drillSchema.getTable(tblName);
Expand Down
@@ -0,0 +1,95 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.sql;

import com.google.common.collect.ImmutableList;
import org.apache.drill.BaseTestQuery;
import org.junit.Test;

public class TestCTAS extends BaseTestQuery {
@Test // DRILL-2589
public void withDuplicateColumnsInDef1() throws Exception {
ctasErrorTestHelper("CREATE TABLE %s.%s AS SELECT region_id, region_id FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "region_id")
);
}

@Test // DRILL-2589
public void withDuplicateColumnsInDef2() throws Exception {
ctasErrorTestHelper("CREATE TABLE %s.%s AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "sales_city")
);
}

@Test // DRILL-2589
public void withDuplicateColumnsInDef3() throws Exception {
ctasErrorTestHelper(
"CREATE TABLE %s.%s(regionid, regionid) " +
"AS SELECT region_id, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "regionid")
);
}

@Test // DRILL-2589
public void withDuplicateColumnsInDef4() throws Exception {
ctasErrorTestHelper(
"CREATE TABLE %s.%s(regionid, salescity, salescity) " +
"AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "salescity")
);
}

@Test // DRILL-2589
public void withDuplicateColumnsInDef5() throws Exception {
ctasErrorTestHelper(
"CREATE TABLE %s.%s(regionid, salescity, SalesCity) " +
"AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "SalesCity")
);
}

@Test // DRILL-2589
public void whenInEqualColumnCountInTableDefVsInTableQuery() throws Exception {
ctasErrorTestHelper(
"CREATE TABLE %s.%s(regionid, salescity) " +
"AS SELECT region_id, sales_city, sales_region FROM cp.`region.json`",
"Error: table's field list and the table's query field list have different counts."
);
}

@Test // DRILL-2589
public void whenTableQueryColumnHasStarAndTableFiledListIsSpecified() throws Exception {
ctasErrorTestHelper(
"CREATE TABLE %s.%s(regionid, salescity) " +
"AS SELECT region_id, * FROM cp.`region.json`",
"Error: table's query field list has a '*', which is invalid when table's field list is specified."
);
}

private static void ctasErrorTestHelper(final String ctasSql, final String errorMsg)
throws Exception {
final String createTableSql = String.format(ctasSql, "dfs_test.tmp", "testTableName");

testBuilder()
.sqlQuery(createTableSql)
.unOrdered()
.baselineColumns("ok", "summary")
.baselineValues(false, errorMsg)
.go();
}
}
Expand Up @@ -193,10 +193,10 @@ public void viewWithSelectFieldsInDef_SelectFieldsInView_SelectFieldsInQuery2()
"(regionid, salescity)",
"SELECT region_id, sales_city FROM cp.`region.json` ORDER BY `region_id` DESC",
"SELECT regionid FROM TEST_SCHEMA.TEST_VIEW_NAME LIMIT 2",
new String[] { "regionid" },
new String[]{"regionid"},
ImmutableList.of(
new Object[] { 109L },
new Object[] { 108L }
new Object[]{109L},
new Object[]{108L}
)
);
}
Expand All @@ -209,10 +209,10 @@ public void viewWithUnionWithSelectFieldsInDef_StarInQuery() throws Exception{
null,
"SELECT region_id FROM cp.`region.json` UNION SELECT employee_id FROM cp.`employee.json`",
"SELECT regionid FROM TEST_SCHEMA.TEST_VIEW_NAME LIMIT 2",
new String[] { "regionid" },
new String[]{"regionid"},
ImmutableList.of(
new Object[] { 110L },
new Object[] { 108L }
new Object[]{110L},
new Object[]{108L}
)
);
}
Expand Down Expand Up @@ -254,9 +254,9 @@ public void viewWithCompoundIdentifiersInDef() throws Exception{
null,
viewDef,
"SELECT * FROM TEST_SCHEMA.TEST_VIEW_NAME LIMIT 1",
new String[] { "n_nationkey", "n_name", "n_regionkey", "n_comment" },
new String[]{"n_nationkey", "n_name", "n_regionkey", "n_comment"},
ImmutableList.of(
new Object[] { 0, "ALGERIA", 0, " haggle. carefully final deposits detect slyly agai" }
new Object[]{0, "ALGERIA", 0, " haggle. carefully final deposits detect slyly agai"}
)
);
}
Expand Down Expand Up @@ -296,8 +296,8 @@ public void createViewWhenViewAlreadyExists() throws Exception {

// Make sure the new view created returns the data expected.
queryViewHelper(String.format("SELECT * FROM %s.`%s` LIMIT 1", TEMP_SCHEMA, viewName),
new String[] { "sales_state_province" },
ImmutableList.of(new Object[] { "None" })
new String[]{"sales_state_province" },
ImmutableList.of(new Object[]{"None"})
);
} finally {
dropViewHelper(TEMP_SCHEMA, viewName, TEMP_SCHEMA);
Expand Down Expand Up @@ -434,7 +434,7 @@ public void viewResolvingTablesInWorkspaceSchema() throws Exception {
createViewHelper(TEMP_SCHEMA, viewName, TEMP_SCHEMA, null, "SELECT region_id, sales_city FROM `region.json`");

final String[] baselineColumns = new String[] { "region_id", "sales_city" };
final List<Object[]> baselineValues = ImmutableList.of(new Object[] { 109L, "Santa Fe"});
final List<Object[]> baselineValues = ImmutableList.of(new Object[]{109L, "Santa Fe"});

// Query the view
queryViewHelper(
Expand Down Expand Up @@ -480,4 +480,114 @@ public void viewSchemaWhenSelectFieldsInDef_SelectFieldsInView() throws Exceptio
dropViewHelper(TEMP_SCHEMA, viewName, TEMP_SCHEMA);
}
}

@Test // DRILL-2589
public void createViewWithDuplicateColumnsInDef1() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s AS SELECT region_id, region_id FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "region_id")
);
}

@Test // DRILL-2589
public void createViewWithDuplicateColumnsInDef2() throws Exception {
createViewErrorTestHelper("CREATE VIEW %s.%s AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "sales_city")
);
}

@Test // DRILL-2589
public void createViewWithDuplicateColumnsInDef3() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s(regionid, regionid) AS SELECT region_id, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "regionid")
);
}

@Test // DRILL-2589
public void createViewWithDuplicateColumnsInDef4() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s(regionid, salescity, salescity) " +
"AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "salescity")
);
}

@Test // DRILL-2589
public void createViewWithDuplicateColumnsInDef5() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s(regionid, salescity, SalesCity) " +
"AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`",
String.format("Error: Duplicate column name [%s]", "SalesCity")
);
}

@Test // DRILL-2589
public void createViewWithDuplicateColumnsInDef6() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s " +
"AS SELECT t1.region_id, t2.region_id FROM cp.`region.json` t1 JOIN cp.`region.json` t2 " +
"ON t1.region_id = t2.region_id LIMIT 1",
String.format("Error: Duplicate column name [%s]", "region_id")
);
}

@Test // DRILL-2589
public void createViewWithUniqueColsInFieldListDuplicateColsInQuery1() throws Exception {
testViewHelper(
TEMP_SCHEMA,
"(regionid1, regionid2)",
"SELECT region_id, region_id FROM cp.`region.json` LIMIT 1",
"SELECT * FROM TEST_SCHEMA.TEST_VIEW_NAME",
new String[]{"regionid1", "regionid2"},
ImmutableList.of(
new Object[]{0L, 0L}
)
);
}

@Test // DRILL-2589
public void createViewWithUniqueColsInFieldListDuplicateColsInQuery2() throws Exception {
testViewHelper(
TEMP_SCHEMA,
"(regionid1, regionid2)",
"SELECT t1.region_id, t2.region_id FROM cp.`region.json` t1 JOIN cp.`region.json` t2 " +
"ON t1.region_id = t2.region_id LIMIT 1",
"SELECT * FROM TEST_SCHEMA.TEST_VIEW_NAME",
new String[]{"regionid1", "regionid2"},
ImmutableList.of(
new Object[]{0L, 0L}
)
);
}

@Test // DRILL-2589
public void createViewWhenInEqualColumnCountInViewDefVsInViewQuery() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s(regionid, salescity) " +
"AS SELECT region_id, sales_city, sales_region FROM cp.`region.json`",
"Error: view's field list and the view's query field list have different counts."
);
}

@Test // DRILL-2589
public void createViewWhenViewQueryColumnHasStarAndViewFiledListIsSpecified() throws Exception {
createViewErrorTestHelper(
"CREATE VIEW %s.%s(regionid, salescity) " +
"AS SELECT region_id, * FROM cp.`region.json`",
"Error: view's query field list has a '*', which is invalid when view's field list is specified."
);
}

private static void createViewErrorTestHelper(final String viewSql, final String errorMsg)
throws Exception {
final String createViewSql = String.format(viewSql, TEMP_SCHEMA, "duplicateColumnsInViewDef");

testBuilder()
.sqlQuery(createViewSql)
.unOrdered()
.baselineColumns("ok", "summary")
.baselineValues(false, errorMsg)
.go();
}
}

0 comments on commit ed02612

Please sign in to comment.