Permalink
Browse files

IMPALA-898: Support explicit column names in WITH-clause views.

Example:
WITH t(c1, c2) AS (SELECT int_col, bool_col FROM functional.alltypes)
SELECT * FROM t

This will create a local view with the 'int_col' and 'bool_col' columns labeled as 'c1'
and 'c2'. If the number of labels is less than the number of columns, then the remaining
columns in the local view will be labeled as the corresponding columns in the query
statement. Therefore, this is also a valid query (only 'int_col' will be labeled as
'c1'):

WITH t(c1) AS (SELECT int_col, bool_col FROM functional.alltypes)
SELECT * FROM t

Change-Id: Ie3a559ca9eaf95c6980c5695a49f02010c42899b
Reviewed-on: http://gerrit.cloudera.org:8080/717
Reviewed-by: Vlad Berindei <vlad.berindei@cloudera.com>
Tested-by: Internal Jenkins
  • Loading branch information...
vladberindei authored and Internal Jenkins committed Aug 29, 2015
1 parent 11baac3 commit aa6208f90533f6019084f3bc5f3c9cf0e5346b1d
@@ -1488,9 +1488,14 @@ opt_with_clause ::=
with_view_def ::=
IDENT:alias KW_AS LPAREN query_stmt:query RPAREN
- {: RESULT = new View(alias, query); :}
+ {: RESULT = new View(alias, query, null); :}
| STRING_LITERAL:alias KW_AS LPAREN query_stmt:query RPAREN
- {: RESULT = new View(alias, query); :}
+ {: RESULT = new View(alias, query, null); :}
+ | IDENT:alias LPAREN ident_list:col_names RPAREN KW_AS LPAREN query_stmt:query RPAREN
+ {: RESULT = new View(alias, query, col_names); :}
+ | STRING_LITERAL:alias LPAREN ident_list:col_names RPAREN
+ KW_AS LPAREN query_stmt:query RPAREN
+ {: RESULT = new View(alias, query, col_names); :}
;
with_view_def_list ::=
@@ -416,10 +416,21 @@ public Analyzer findAnalyzer(TupleId tid) {
/**
* Registers a local view definition with this analyzer. Throws an exception if a view
- * definition with the same alias has already been registered.
+ * definition with the same alias has already been registered or if the number of
+ * explicit column labels is greater than the number of columns in the view statement.
*/
public void registerLocalView(View view) throws AnalysisException {
Preconditions.checkState(view.isLocalView());
+ if (view.hasColLabels()) {
+ List<String> viewLabels = view.getColLabels();
+ List<String> queryStmtLabels = view.getQueryStmt().getColLabels();
+ if (viewLabels.size() > queryStmtLabels.size()) {
+ throw new AnalysisException("WITH-clause view '" + view.getName() +
+ "' returns " + queryStmtLabels.size() + " columns, but " +
+ viewLabels.size() + " labels were specified. The number of column " +
+ "labels must be smaller or equal to the number of returned columns.");
+ }
+ }
if (localViews_.put(view.getName().toLowerCase(), view) != null) {
throw new AnalysisException(
String.format("Duplicate table alias: '%s'", view.getName()));
@@ -97,6 +97,7 @@ public InlineViewRef(View view, TableRef origTblRef) {
smap_ = new ExprSubstitutionMap();
baseTblSmap_ = new ExprSubstitutionMap();
setJoinAttrs(origTblRef);
+ explicitColLabels_ = view.getColLabels();
// Set implicit aliases if no explicit one was given.
if (hasExplicitAlias()) return;
aliases_ = new String[] {
@@ -93,6 +93,14 @@ public static String getIdentSql(String ident) {
return ident;
}
+ public static List<String> getIdentSqlList(List<String> identList) {
+ List<String> identSqlList = Lists.newArrayList();
+ for (String ident: identList) {
+ identSqlList.add(getIdentSql(ident));
+ }
+ return identSqlList;
+ }
+
public static String getPathSql(List<String> path) {
StringBuilder result = new StringBuilder();
for (String p: path) {
@@ -104,7 +104,8 @@ private WithClause(WithClause other) {
Preconditions.checkNotNull(other);
views_ = Lists.newArrayList();
for (View view: other.views_) {
- views_.add(new View(view.getName(), view.getQueryStmt().clone()));
+ views_.add(new View(view.getName(), view.getQueryStmt().clone(),
+ view.getOriginalColLabels()));
}
}
@@ -119,9 +120,13 @@ public void reset() {
public String toSql() {
List<String> viewStrings = Lists.newArrayList();
for (View view: views_) {
- // Enclose the view alias in quotes if Hive cannot parse it without quotes.
- // This is needed for view compatibility between Impala and Hive.
+ // Enclose the view alias and explicit labels in quotes if Hive cannot parse it
+ // without quotes. This is needed for view compatibility between Impala and Hive.
String aliasSql = ToSqlUtils.getIdentSql(view.getName());
+ if (view.hasColLabels()) {
+ aliasSql += "(" + Joiner.on(", ").join(
+ ToSqlUtils.getIdentSqlList(view.getOriginalColLabels())) + ")";
+ }
viewStrings.add(aliasSql + " AS (" + view.getQueryStmt().toSql() + ")");
}
return "WITH " + Joiner.on(",").join(viewStrings);
@@ -15,6 +15,7 @@
package com.cloudera.impala.catalog;
import java.io.StringReader;
+import java.util.ArrayList;
import java.util.List;
import java.util.Set;
@@ -29,6 +30,7 @@
import com.cloudera.impala.thrift.TTable;
import com.cloudera.impala.thrift.TTableDescriptor;
import com.cloudera.impala.thrift.TTableType;
+import com.google.common.collect.Lists;
/**
* Table metadata representing a catalog view or a local view from a WITH clause.
@@ -64,19 +66,24 @@
// Set if this View is from a WITH clause and not persisted in the catalog.
private final boolean isLocalView_;
+ // Set if this View is from a WITH clause with column labels.
+ private List<String> colLabels_;
+
public View(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable,
Db db, String name, String owner) {
super(id, msTable, db, name, owner);
isLocalView_ = false;
}
/**
- * C'tor for WITH-clause views that already have a parsed QueryStmt.
+ * C'tor for WITH-clause views that already have a parsed QueryStmt and an optional
+ * list of column labels.
*/
- public View(String alias, QueryStmt queryStmt) {
+ public View(String alias, QueryStmt queryStmt, List<String> colLabels) {
super(null, null, null, alias, null);
isLocalView_ = true;
queryStmt_ = queryStmt;
+ colLabels_ = colLabels;
}
@Override
@@ -147,6 +154,27 @@ private void init() throws TableLoadingException {
public String getInlineViewDef() { return inlineViewDef_; }
public boolean isLocalView() { return isLocalView_; }
+ /**
+ * Returns the column labels the user specified in the WITH-clause.
+ */
+ public List<String> getOriginalColLabels() { return colLabels_; }
+
+ /**
+ * Returns the explicit column labels for this view, or null if they need to be derived
+ * entirely from the underlying query statement. The returned list has at least as many
+ * elements as the number of column labels in the query stmt.
+ */
+ public List<String> getColLabels() {
+ if (colLabels_ == null) return null;
+ if (colLabels_.size() >= queryStmt_.getColLabels().size()) return colLabels_;
+ List<String> explicitColLabels = Lists.newArrayList(colLabels_);
+ explicitColLabels.addAll(queryStmt_.getColLabels().subList(
+ colLabels_.size(), queryStmt_.getColLabels().size()));
+ return explicitColLabels;
+ }
+
+ public boolean hasColLabels() { return colLabels_ != null; }
+
@Override
public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) {
throw new IllegalStateException("Cannot call toThriftDescriptor() on a view.");
@@ -2345,12 +2345,28 @@ public void TestWithClause() throws AnalysisException {
// Single view in WITH clause.
AnalyzesOk("with t as (select int_col x, bigint_col y from functional.alltypes) " +
"select x, y from t");
+ // Single view in WITH clause with column labels.
+ AnalyzesOk("with t(c1, c2) as (select int_col x, bigint_col y " +
+ "from functional.alltypes) " +
+ "select c1, c2 from t");
+ // Single view in WITH clause with the number of column labels less than the number
+ // of columns.
+ AnalyzesOk("with t(c1) as (select int_col, bigint_col y " +
+ "from functional.alltypes) " +
+ "select c1, y from t");
// Multiple views in WITH clause. Only one view is used.
AnalyzesOk("with t1 as (select int_col x, bigint_col y from functional.alltypes), " +
- "t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)), " +
+ "t2 as (select 1 x, 10 y), t3 as (values(2 x, 20 y), (3, 30)), " +
"t4 as (select 4 x, 40 y union all select 5, 50), " +
"t5 as (select * from (values(6 x, 60 y)) as a) " +
"select x, y from t3");
+ // Multiple views in WITH clause with column labels. Only one view is used.
+ AnalyzesOk("with t1(c1, c2) as (select int_col, bigint_col " +
+ "from functional.alltypes), " +
+ "t2(c1, c2) as (select 1, 10), t3(a, b) as (values(2, 5), (3, 30)), " +
+ "t4(c1, c2) as (select 4, 40 union all select 5, 50), " +
+ "t5 as (select * from (values(6, 60)) as a) " +
+ "select a, b from t3");
// Multiple views in WITH clause. All views used in a union.
AnalyzesOk("with t1 as (select int_col x, bigint_col y from functional.alltypes), " +
"t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)), " +
@@ -2365,9 +2381,19 @@ public void TestWithClause() throws AnalysisException {
"t5 as (select * from (values(6 x, 60 y)) as a) " +
"select t1.y, t2.y, t3.y, t4.y, t5.y from t1, t2, t3, t4, t5 " +
"where t1.y = t2.y and t2.y = t3.y and t3.y = t4.y and t4.y = t5.y");
+ // Multiple views in WITH clause with column labels. All views used in a join.
+ AnalyzesOk("with t1(c1, c2) as (select int_col x, bigint_col y " +
+ "from functional.alltypes), " +
+ "t2(c1, c2) as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)), " +
+ "t4 as (select 4 x, 40 y union all select 5, 50), " +
+ "t5 as (select * from (values(6 x, 60 y)) as a) " +
+ "select t1.c2, t2.c2, t3.y, t4.y, t5.y from t1, t2, t3, t4, t5 " +
+ "where t1.c2 = t2.c2 and t2.c2 = t3.y and t3.y = t4.y and t4.y = t5.y");
// WITH clause in insert statement.
AnalyzesOk("with t1 as (select * from functional.alltypestiny)" +
"insert into functional.alltypes partition(year, month) select * from t1");
+ AnalyzesOk("with t1(c1, c2) as (select * from functional.alltypestiny)" +
+ "insert into functional.alltypes partition(year, month) select * from t1");
// WITH clause in insert statement with a select statement that has a WITH
// clause and an inline view (IMPALA-1100)
AnalyzesOk("with test_ctas_1 as (select * from functional.alltypestiny) insert " +
@@ -2385,6 +2411,8 @@ public void TestWithClause() throws AnalysisException {
AnalyzesOk("with t1 as (select id from functional.alltypestiny) " +
"insert into functional.alltypes partition(year, month) " +
"with t1 as (select * from functional.alltypessmall) select * from t1");
+ AnalyzesOk("with t(c1, c2) as (select * from functional.alltypes) " +
+ "select a.c1, a.c2 from t a");
// WITH-clause view used in inline view.
AnalyzesOk("with t1 as (select 'a') select * from (select * from t1) as t2");
AnalyzesOk("with t1 as (select 'a') " +
@@ -2413,6 +2441,9 @@ public void TestWithClause() throws AnalysisException {
// Aliases are resolved from inner-most to the outer-most scope.
AnalyzesOk("with t1 as (select 'a') " +
"select t2.* from (with t1 as (select 'b') select * from t1) as t2");
+ // Column labels do not conflict because they are in different scopes.
+ AnalyzesOk("with t1(c1) as (select 'a') " +
+ "select c1 from (with t1(c1) as (select 'b') select c1 from t1) as t2");
// Table aliases do not conflict because t1 from the inline view is never used.
AnalyzesOk("with t1 as (select 1), t2 as (select 2)" +
"select * from functional.alltypes as t1");
@@ -2421,6 +2452,9 @@ public void TestWithClause() throws AnalysisException {
// Fully-qualified table does not conflict with WITH-clause table.
AnalyzesOk("with alltypes as (select * from functional.alltypes) " +
"select * from functional.alltypes union all select * from alltypes");
+ // Column labels can be used with table aliases.
+ AnalyzesOk("with t(c1) as (select id from functional.alltypes) " +
+ "select a.c1 from t a");
// Use a custom analyzer to change the default db to functional.
// Recursion is prevented because 'alltypes' in t1 refers to the table
@@ -2491,6 +2525,16 @@ public void TestWithClause() throws AnalysisException {
"Duplicate table alias: 't1'");
AnalysisError("with t1 as (select 1) select * from (select 2) as t1 inner join t1",
"Duplicate table alias: 't1'");
+ // With clause column labels must be used intead of aliases.
+ AnalysisError("with t1(c1) as (select id cnt from functional.alltypes) "+
+ "select cnt from t1",
+ "Could not resolve column/field reference: 'cnt'");
+ // With clause column labels must not exceed the number of columns in the query.
+ AnalysisError("with t(c1, c2) as (select id from functional.alltypes) " +
+ "select * from t",
+ "WITH-clause view 't' returns 1 columns, but 2 labels were specified. The " +
+ "number of column labels must be smaller or equal to the number of returned " +
+ "columns.");
// Multiple references in same select statement require aliases.
AnalysisError("with t1 as (select 'a' x) select * from t1 inner join t1",
"Duplicate table alias: 't1'");
@@ -750,11 +750,16 @@ public void TestValuesStmt() throws AnalysisException {
@Test
public void TestWithClause() throws AnalysisException {
ParsesOk("with t as (select 1 as a) select a from t");
+ ParsesOk("with t(x) as (select 1 as a) select x from t");
ParsesOk("with t as (select c from tab) select * from t");
+ ParsesOk("with t(x, y) as (select * from tab) select * from t");
ParsesOk("with t as (values(1, 2, 3), (4, 5, 6)) select * from t");
+ ParsesOk("with t(x, y, z) as (values(1, 2, 3), (4, 5, 6)) select * from t");
ParsesOk("with t1 as (select 1 as a), t2 as (select 2 as a) select a from t1");
ParsesOk("with t1 as (select c from tab), t2 as (select c from tab)" +
"select c from t2");
+ ParsesOk("with t1(x) as (select c from tab), t2(x) as (select c from tab)" +
+ "select x from t2");
// With clause and union statement.
ParsesOk("with t1 as (select 1 as a), t2 as (select 2 as a)" +
"select a from t1 union all select a from t2");
@@ -763,16 +768,19 @@ public void TestWithClause() throws AnalysisException {
"select a from t1 inner join t2 on t1.a = t2.a");
// With clause in inline view.
ParsesOk("select * from (with t as (select 1 as a) select * from t) as a");
+ ParsesOk("select * from (with t(x) as (select 1 as a) select * from t) as a");
// With clause in query statement of insert statement.
ParsesOk("insert into x with t as (select * from tab) select * from t");
+ ParsesOk("insert into x with t(x, y) as (select * from tab) select * from t");
ParsesOk("insert into x with t as (values(1, 2, 3)) select * from t");
+ ParsesOk("insert into x with t(x, y) as (values(1, 2, 3)) select * from t");
// With clause before insert statement.
ParsesOk("with t as (select 1) insert into x select * from t");
+ ParsesOk("with t(x) as (select 1) insert into x select * from t");
// Test quoted identifier or string literal as table alias.
ParsesOk("with `t1` as (select 1 a), 't2' as (select 2 a), \"t3\" as (select 3 a)" +
"select a from t1 union all select a from t2 union all select a from t3");
-
// Multiple with clauses. Operands must be in parenthesis to
// have their own with clause.
ParsesOk("with t as (select 1) " +
@@ -783,9 +791,14 @@ public void TestWithClause() throws AnalysisException {
"(with t as (select 3) select * from t) order by 1 limit 1");
// Multiple with clauses. One before the insert and one inside the query statement.
ParsesOk("with t as (select 1) insert into x with t as (select 2) select * from t");
+ ParsesOk("with t(c1) as (select 1) " +
+ "insert into x with t(c2) as (select 2) select * from t");
// Empty with clause.
ParserError("with t as () select 1");
+ ParserError("with t(x) as () select 1");
+ // No labels inside parenthesis.
+ ParserError("with t() as (select 1 as a) select a from t");
// Missing select, union or insert statement after with clause.
ParserError("select * from (with t as (select 1 as a)) as a");
ParserError("with t as (select 1)");
@@ -794,8 +807,11 @@ public void TestWithClause() throws AnalysisException {
ParserError("with t as select 1 as a union all select a from t");
ParserError("with t1 as (select 1 as a), t2 as select 2 as a select a from t");
ParserError("with t as select 1 as a select a from t");
+ // Missing parenthesis around column labels.
+ ParserError("with t c1 as (select 1 as a) select c1 from t");
// Insert in with clause is not valid.
ParserError("with t as (insert into x select * from tab) select * from t");
+ ParserError("with t(c1) as (insert into x select * from tab) select * from t");
// Union operands need to be parenthesized to have their own with clause.
ParserError("select * from t union all with t as (select 2) select * from t");
}
@@ -774,6 +774,14 @@ public void withClauseTest() {
// WITH clause in select stmt.
testToSql("with t as (select * from functional.alltypes) select * from t",
"WITH t AS (SELECT * FROM functional.alltypes) SELECT * FROM t");
+ testToSql("with t(c1) as (select * from functional.alltypes) select * from t",
+ "WITH t(c1) AS (SELECT * FROM functional.alltypes) SELECT * FROM t");
+ testToSql("with t(`table`, col, `create`) as (select * from functional.alltypes) " +
+ "select * from t",
+ "WITH t(`table`, col, `create`) AS (SELECT * FROM functional.alltypes) " +
+ "SELECT * FROM t");
+ testToSql("with t(c1, c2) as (select * from functional.alltypes) select * from t",
+ "WITH t(c1, c2) AS (SELECT * FROM functional.alltypes) SELECT * FROM t");
testToSql("with t as (select sum(int_col) over(partition by tinyint_col, " +
"bool_col order by float_col rows between unbounded preceding and " +
"current row) as x from functional.alltypes) " +
@@ -787,6 +795,10 @@ public void withClauseTest() {
"select * from t a inner join t b on (a.int_col = b.int_col)",
"WITH t AS (SELECT * FROM functional.alltypes) " +
"SELECT * FROM t a INNER JOIN t b ON (a.int_col = b.int_col)");
+ testToSql("with t(c1, c2) as (select * from functional.alltypes) " +
+ "select a.c1, a.c2 from t a inner join t b on (a.c1 = b.c2)",
+ "WITH t(c1, c2) AS (SELECT * FROM functional.alltypes) " +
+ "SELECT a.c1, a.c2 FROM t a INNER JOIN t b ON (a.c1 = b.c2)");
// WITH clause in select stmt with a join and a USING clause.
testToSql("with t as (select * from functional.alltypes) " +
"select * from t a inner join t b using(int_col)",
Oops, something went wrong.

0 comments on commit aa6208f

Please sign in to comment.