From 4886312282dbff392a84f5f6f3c7569e52f5f2e5 Mon Sep 17 00:00:00 2001 From: HyoungJun Kim Date: Tue, 7 Oct 2014 21:56:28 +0900 Subject: [PATCH] TAJO-1102: Self-join with a partitioned table returns wrong result data. --- .../logical/PartitionedTableScanNode.java | 4 ++ .../tajo/engine/planner/logical/ScanNode.java | 30 +++++---- .../tajo/engine/query/TestJoinBroadcast.java | 61 +++++++++++++++++++ ...stMultiColumnPartitionTableInFilter.result | 0 4 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTableInFilter.result diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/PartitionedTableScanNode.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/PartitionedTableScanNode.java index 45cc578142..c33bdf518d 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/PartitionedTableScanNode.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/PartitionedTableScanNode.java @@ -41,6 +41,10 @@ public void init(ScanNode scanNode, Path[] inputPaths) { this.qual = scanNode.qual; this.targets = scanNode.targets; this.inputPaths = inputPaths; + + if (scanNode.hasAlias()) { + alias = scanNode.alias; + } } public void setInputPaths(Path [] paths) { diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/ScanNode.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/ScanNode.java index 8d28e6e83d..fc5cefca95 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/ScanNode.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/logical/ScanNode.java @@ -180,22 +180,26 @@ public boolean equals(Object obj) { @Override public Object clone() throws CloneNotSupportedException { - ScanNode scanNode = (ScanNode) super.clone(); - - scanNode.tableDesc = (TableDesc) this.tableDesc.clone(); - - if (hasQual()) { - scanNode.qual = (EvalNode) this.qual.clone(); - } - - if (hasTargets()) { - scanNode.targets = new Target[targets.length]; + ScanNode scanNode = (ScanNode) super.clone(); + + scanNode.tableDesc = (TableDesc) this.tableDesc.clone(); + + if (hasQual()) { + scanNode.qual = (EvalNode) this.qual.clone(); + } + + if (hasTargets()) { + scanNode.targets = new Target[targets.length]; for (int i = 0; i < targets.length; i++) { scanNode.targets[i] = (Target) targets[i].clone(); } - } - - return scanNode; + } + + if (hasAlias()) { + scanNode.alias = alias; + } + + return scanNode; } @Override diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java index 2625136dcc..0bfed8dbdd 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java @@ -646,5 +646,66 @@ public final void testSelfJoin() throws Exception { } + @Test + public final void testSelfJoin2() throws Exception { + /* + https://issues.apache.org/jira/browse/TAJO-1102 + See the following case. + CREATE TABLE orders_partition + (o_orderkey INT8, o_custkey INT8, o_totalprice FLOAT8, o_orderpriority TEXT, + o_clerk TEXT, o_shippriority INT4, o_comment TEXT) USING CSV WITH ('csvfile.delimiter'='|') + PARTITION BY COLUMN(o_orderdate TEXT, o_orderstatus TEXT); + + select a.o_orderstatus, count(*) as cnt + from orders_partition a + inner join orders_partition b + on a.o_orderdate = b.o_orderdate + and a.o_orderstatus = b.o_orderstatus + and a.o_orderkey = b.o_orderkey + where a.o_orderdate='1995-02-21' + and a.o_orderstatus in ('F') + group by a.o_orderstatus; + + Because of the where condition[where a.o_orderdate='1995-02-21 and a.o_orderstatus in ('F')], + orders_partition table aliased a is small and broadcast target. + */ + String tableName = CatalogUtil.normalizeIdentifier("partitioned_orders_large"); + ResultSet res = executeString( + "create table " + tableName + " (o_orderkey INT8, o_custkey INT8, o_totalprice FLOAT8, o_orderpriority TEXT,\n" + + "o_clerk TEXT, o_shippriority INT4, o_comment TEXT) USING CSV WITH ('csvfile.delimiter'='|')\n" + + "PARTITION BY COLUMN(o_orderdate TEXT, o_orderstatus TEXT, o_orderkey_mod INT8)"); + res.close(); + assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); + + res = executeString( + "insert overwrite into " + tableName + + " select o_orderkey, o_custkey, o_totalprice, " + + " o_orderpriority, o_clerk, o_shippriority, o_comment, o_orderdate, o_orderstatus, o_orderkey % 10 " + + " from orders_large "); + res.close(); + + res = executeString( + "select a.o_orderdate, a.o_orderstatus, a.o_orderkey % 10 as o_orderkey_mod, a.o_totalprice " + + "from orders_large a " + + "join orders_large b on a.o_orderkey = b.o_orderkey " + + "where a.o_orderdate = '1993-10-14' and a.o_orderstatus = 'F' and a.o_orderkey % 10 = 1" + + " order by a.o_orderkey" + ); + String expected = resultSetToString(res); + res.close(); + + res = executeString( + "select a.o_orderdate, a.o_orderstatus, a.o_orderkey_mod, a.o_totalprice " + + "from " + tableName + + " a join "+ tableName + " b on a.o_orderkey = b.o_orderkey " + + "where a.o_orderdate = '1993-10-14' and a.o_orderstatus = 'F' and o_orderkey_mod = 1 " + + " order by a.o_orderkey" + ); + String resultSetData = resultSetToString(res); + res.close(); + + assertEquals(expected, resultSetData); + + } } diff --git a/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTableInFilter.result b/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTableInFilter.result new file mode 100644 index 0000000000..e69de29bb2