Skip to content

Commit

Permalink
[SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCata…
Browse files Browse the repository at this point in the history
…logRelation

#### What changes were proposed in this pull request?
Different from the other leaf nodes, `MetastoreRelation` and `SimpleCatalogRelation` have a pre-defined `alias`, which is used to change the qualifier of the node. However, based on the existing alias handling, alias should be put in `SubqueryAlias`.

This PR is to separate alias handling from `MetastoreRelation` and `SimpleCatalogRelation` to make it consistent with the other nodes. It simplifies the signature and conversion to a `BaseRelation`.

For example, below is an example query for `MetastoreRelation`,  which is converted to a `LogicalRelation`:
```SQL
SELECT tmp.a + 1 FROM test_parquet_ctas tmp WHERE tmp.a > 2
```

Before changes, the analyzed plan is
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)apache#952]
+- Filter (a#951 > 2)
   +- SubqueryAlias tmp
      +- Relation[a#951] parquet
```
After changes, the analyzed plan becomes
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)apache#952]
+- Filter (a#951 > 2)
   +- SubqueryAlias tmp
      +- SubqueryAlias test_parquet_ctas
         +- Relation[a#951] parquet
```

**Note: the optimized plans are the same.**

For `SimpleCatalogRelation`, the existing code always generates two Subqueries. Thus, no change is needed.

#### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes apache#14053 from gatorsmile/removeAliasFromMetastoreRelation.
  • Loading branch information
gatorsmile authored and cloud-fan committed Jul 7, 2016
1 parent 34283de commit 42279bf
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 15 deletions.
Expand Up @@ -403,7 +403,7 @@ class SessionCatalog(
val relation =
if (name.database.isDefined || !tempTables.contains(table)) {
val metadata = externalCatalog.getTable(db, table)
SimpleCatalogRelation(db, metadata, alias)
SimpleCatalogRelation(db, metadata)
} else {
tempTables(table)
}
Expand Down
Expand Up @@ -244,8 +244,7 @@ trait CatalogRelation {
*/
case class SimpleCatalogRelation(
databaseName: String,
metadata: CatalogTable,
alias: Option[String] = None)
metadata: CatalogTable)
extends LeafNode with CatalogRelation {

override def catalogTable: CatalogTable = metadata
Expand All @@ -261,7 +260,7 @@ case class SimpleCatalogRelation(
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifier = Some(alias.getOrElse(metadata.identifier.table)))
)(qualifier = Some(metadata.identifier.table))
}
}

Expand Down
Expand Up @@ -407,7 +407,7 @@ class SessionCatalogSuite extends SparkFunSuite {
val relationWithAlias =
SubqueryAlias(alias,
SubqueryAlias("tbl1",
SimpleCatalogRelation("db2", tableMetadata, Some(alias))))
SimpleCatalogRelation("db2", tableMetadata)))
assert(catalog.lookupRelation(
TableIdentifier("tbl1", Some("db2")), alias = None) == relation)
assert(catalog.lookupRelation(
Expand Down
Expand Up @@ -180,8 +180,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
SubqueryAlias(aliasText, sessionState.sqlParser.parsePlan(viewText))
}
} else {
MetastoreRelation(
qualifiedTableName.database, qualifiedTableName.name, alias)(table, client, sparkSession)
val qualifiedTable =
MetastoreRelation(
qualifiedTableName.database, qualifiedTableName.name)(table, client, sparkSession)
alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable)
}
}

Expand Down Expand Up @@ -385,7 +387,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
// Read path
case relation: MetastoreRelation if shouldConvertMetastoreParquet(relation) =>
val parquetRelation = convertToParquetRelation(relation)
SubqueryAlias(relation.alias.getOrElse(relation.tableName), parquetRelation)
SubqueryAlias(relation.tableName, parquetRelation)
}
}
}
Expand Down Expand Up @@ -423,7 +425,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
// Read path
case relation: MetastoreRelation if shouldConvertMetastoreOrc(relation) =>
val orcRelation = convertToOrcRelation(relation)
SubqueryAlias(relation.alias.getOrElse(relation.tableName), orcRelation)
SubqueryAlias(relation.tableName, orcRelation)
}
}
}
Expand Down
Expand Up @@ -41,8 +41,7 @@ import org.apache.spark.sql.hive.client.HiveClient

private[hive] case class MetastoreRelation(
databaseName: String,
tableName: String,
alias: Option[String])
tableName: String)
(val catalogTable: CatalogTable,
@transient private val client: HiveClient,
@transient private val sparkSession: SparkSession)
Expand All @@ -52,13 +51,12 @@ private[hive] case class MetastoreRelation(
case relation: MetastoreRelation =>
databaseName == relation.databaseName &&
tableName == relation.tableName &&
alias == relation.alias &&
output == relation.output
case _ => false
}

override def hashCode(): Int = {
Objects.hashCode(databaseName, tableName, alias, output)
Objects.hashCode(databaseName, tableName, output)
}

override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil
Expand Down Expand Up @@ -208,7 +206,7 @@ private[hive] case class MetastoreRelation(
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifier = Some(alias.getOrElse(tableName)))
)(qualifier = Some(tableName))
}

/** PartitionKey attributes */
Expand Down Expand Up @@ -243,6 +241,6 @@ private[hive] case class MetastoreRelation(
}

override def newInstance(): MetastoreRelation = {
MetastoreRelation(databaseName, tableName, alias)(catalogTable, client, sparkSession)
MetastoreRelation(databaseName, tableName)(catalogTable, client, sparkSession)
}
}

0 comments on commit 42279bf

Please sign in to comment.