Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
01e4cdf
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 13, 2015
6835704
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 14, 2015
9180687
Merge remote-tracking branch 'upstream/master'
gatorsmile Nov 14, 2015
b38a21e
SPARK-11633
gatorsmile Nov 17, 2015
d2b84af
Merge remote-tracking branch 'upstream/master' into joinMakeCopy
gatorsmile Nov 17, 2015
fda8025
Merge remote-tracking branch 'upstream/master'
gatorspark Nov 17, 2015
ac0dccd
Merge branch 'master' of https://github.com/gatorsmile/spark
gatorspark Nov 17, 2015
6e0018b
Merge remote-tracking branch 'upstream/master'
Nov 20, 2015
0546772
converge
gatorsmile Nov 20, 2015
b37a64f
converge
gatorsmile Nov 20, 2015
c2a872c
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 6, 2016
ab6dbd7
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 6, 2016
4276356
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 6, 2016
2dab708
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 7, 2016
0458770
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 8, 2016
1debdfa
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 9, 2016
763706d
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 14, 2016
4de6ec1
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 18, 2016
9422a4f
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 19, 2016
52bdf48
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 20, 2016
1e95df3
Merge remote-tracking branch 'upstream/master'
gatorsmile Jan 23, 2016
fab24cf
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 1, 2016
8b2e33b
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 5, 2016
2ee1876
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 11, 2016
b9f0090
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 12, 2016
ade6f7e
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 15, 2016
9fd63d2
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 19, 2016
5199d49
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 22, 2016
404214c
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 23, 2016
c001dd9
Merge remote-tracking branch 'upstream/master'
gatorsmile Feb 25, 2016
59daa48
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 5, 2016
41d5f64
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 7, 2016
472a6e3
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 10, 2016
0fba10a
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 12, 2016
cbf73b3
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 21, 2016
c08f561
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 22, 2016
474df88
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 22, 2016
3d9828d
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 24, 2016
72d2361
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 26, 2016
07afea5
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 29, 2016
8bf2007
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 30, 2016
87a165b
Merge remote-tracking branch 'upstream/master'
gatorsmile Mar 31, 2016
b9359cd
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 1, 2016
65bd090
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 5, 2016
babf2da
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 5, 2016
9e09469
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 6, 2016
50a8e4a
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 6, 2016
f3337fa
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 10, 2016
09cc36d
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 12, 2016
83a1915
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 14, 2016
0483145
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 19, 2016
236a5f4
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 20, 2016
08aaa4d
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 21, 2016
64f704e
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 24, 2016
006ea2d
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 26, 2016
0c0dc8a
Merge remote-tracking branch 'upstream/master'
gatorsmile Apr 27, 2016
ddd0b2e
initial fix.
gatorsmile Apr 29, 2016
980b51e
initial fix.
gatorsmile Apr 29, 2016
524d5a4
Merge remote-tracking branch 'upstream/master' into banDropMultiPart
gatorsmile Apr 29, 2016
b0e19c4
Merge remote-tracking branch 'upstream/master' into banDropMultiPart
gatorsmile Apr 29, 2016
6272398
fixed test cases
gatorsmile Apr 30, 2016
3b7b5de
address comments
gatorsmile May 1, 2016
7c4b2f0
Merge remote-tracking branch 'upstream/master'
gatorsmile May 1, 2016
78a868b
Merge branch 'banDropMultiPart' into banDropMultiPartNew
gatorsmile May 1, 2016
38f3af9
Merge remote-tracking branch 'upstream/master'
gatorsmile May 1, 2016
fa15228
fix tests
gatorsmile May 1, 2016
8089c6f
Merge remote-tracking branch 'upstream/master'
gatorsmile May 4, 2016
acbbf5c
Merge branch 'banDropMultiPartNew' into banDropMultiPartNewNew
gatorsmile May 4, 2016
8a4980c
revert.
gatorsmile May 4, 2016
a6c7518
Merge remote-tracking branch 'upstream/master'
gatorsmile May 4, 2016
546c1db
Merge remote-tracking branch 'upstream/master'
gatorsmile May 4, 2016
e2ece35
Merge remote-tracking branch 'upstream/master'
gatorsmile May 5, 2016
13c04be
Merge remote-tracking branch 'upstream/master'
gatorsmile May 6, 2016
ac88fc1
Merge remote-tracking branch 'upstream/master'
gatorsmile May 6, 2016
154d3df
Merge remote-tracking branch 'upstream/master'
gatorsmile May 10, 2016
412e88a
Merge remote-tracking branch 'upstream/master'
gatorsmile May 10, 2016
c570065
Merge remote-tracking branch 'upstream/master'
gatorsmile May 11, 2016
ac03674
Merge remote-tracking branch 'upstream/master'
gatorsmile May 11, 2016
f58b5f7
Merge branch 'banDropMultiPartNewNew' into banDropMultiPartNewNewNew
gatorsmile May 11, 2016
3ccd099
address comments.
gatorsmile May 11, 2016
8e13da3
address comments.
gatorsmile May 11, 2016
15f287f
issue a better error message
gatorsmile May 11, 2016
9ca7621
remove useless checks
gatorsmile May 11, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ class SessionCatalog(
tableName: TableIdentifier,
parts: Seq[CatalogTablePartition],
ignoreIfExists: Boolean): Unit = {
requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
val table = formatTableName(tableName.table)
requireDbExists(db)
Expand All @@ -523,13 +524,14 @@ class SessionCatalog(
*/
def dropPartitions(
tableName: TableIdentifier,
parts: Seq[TablePartitionSpec],
specs: Seq[TablePartitionSpec],
ignoreIfNotExists: Boolean): Unit = {
requirePartialMatchedPartitionSpec(specs, getTableMetadata(tableName))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, I thought the intention of this patch is to ban the partial matching, which is buggy in Hive? Looks like this just ensures that the user doesn't specify a column that is not a partitioned column here. Is that sufficient to bypass the bug in Hive that you were talking about?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, if we do not specify the invalid partition spec, Hive can return a correct set of qualified partitions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thus, this PR resolves the issue by banning users to specify the invalid partition spec.

Below is the test case:
https://github.com/gatorsmile/spark/blob/9ca76217defe5e0154b1d26a61baf4831d19df3a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala#L189-L194

Without this PR fix, it will drop all the partitions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand now. The earlier PR descriptions were kind of confusing.

val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
val table = formatTableName(tableName.table)
requireDbExists(db)
requireTableExists(TableIdentifier(table, Option(db)))
externalCatalog.dropPartitions(db, table, parts, ignoreIfNotExists)
externalCatalog.dropPartitions(db, table, specs, ignoreIfNotExists)
}

/**
Expand All @@ -542,6 +544,9 @@ class SessionCatalog(
tableName: TableIdentifier,
specs: Seq[TablePartitionSpec],
newSpecs: Seq[TablePartitionSpec]): Unit = {
val tableMetadata = getTableMetadata(tableName)
requireExactMatchedPartitionSpec(specs, tableMetadata)
requireExactMatchedPartitionSpec(newSpecs, tableMetadata)
val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
val table = formatTableName(tableName.table)
requireDbExists(db)
Expand All @@ -559,6 +564,7 @@ class SessionCatalog(
* this becomes a no-op.
*/
def alterPartitions(tableName: TableIdentifier, parts: Seq[CatalogTablePartition]): Unit = {
requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
val table = formatTableName(tableName.table)
requireDbExists(db)
Expand All @@ -571,6 +577,7 @@ class SessionCatalog(
* If no database is specified, assume the table is in the current database.
*/
def getPartition(tableName: TableIdentifier, spec: TablePartitionSpec): CatalogTablePartition = {
requireExactMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
val table = formatTableName(tableName.table)
requireDbExists(db)
Expand All @@ -595,6 +602,42 @@ class SessionCatalog(
externalCatalog.listPartitions(db, table, partialSpec)
}

/**
* Verify if the input partition spec exactly matches the existing defined partition spec
* The columns must be the same but the orders could be different.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it consistent with hive? Sorry I don't remember it clearly, but sometimes the partition order matters in hive?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, Hive supports it. To the external users, the orders do not matter. This is just for usability, I think.

*/
private def requireExactMatchedPartitionSpec(
Copy link
Contributor

@andrewor14 andrewor14 May 3, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the interface here could be better, maybe something like

private def requirePartitionsDefinedInTable(
    specs: Seq[TablePartitionSpec],
    table: CatalogTable): Unit = {
  ...
}

This would allow you to reuse more code

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, will do it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The latest code changes have contained this great idea. Thanks!

specs: Seq[TablePartitionSpec],
table: CatalogTable): Unit = {
val defined = table.partitionColumnNames.sorted
specs.foreach { s =>
if (s.keys.toSeq.sorted != defined) {
throw new AnalysisException(
s"Partition spec is invalid. The spec (${s.keys.mkString(", ")}) must match " +
s"the partition spec (${table.partitionColumnNames.mkString(", ")}) defined in " +
s"table '${table.identifier}'")
}
}
}

/**
* Verify if the input partition spec partially matches the existing defined partition spec
* That is, the columns of partition spec should be part of the defined partition spec.
*/
private def requirePartialMatchedPartitionSpec(
specs: Seq[TablePartitionSpec],
table: CatalogTable): Unit = {
val defined = table.partitionColumnNames
specs.foreach { s =>
if (!s.keys.forall(defined.contains)) {
throw new AnalysisException(
s"Partition spec is invalid. The spec (${s.keys.mkString(", ")}) must be contained " +
s"within the partition spec (${table.partitionColumnNames.mkString(", ")}) defined " +
s"in table '${table.identifier}'")
}
}
}

// ----------------------------------------------------------------------------
// Functions
// ----------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,12 @@ abstract class CatalogTestUtils {
lazy val part1 = CatalogTablePartition(Map("a" -> "1", "b" -> "2"), storageFormat)
lazy val part2 = CatalogTablePartition(Map("a" -> "3", "b" -> "4"), storageFormat)
lazy val part3 = CatalogTablePartition(Map("a" -> "5", "b" -> "6"), storageFormat)
lazy val partWithMixedOrder = CatalogTablePartition(Map("b" -> "6", "a" -> "6"), storageFormat)
lazy val partWithLessColumns = CatalogTablePartition(Map("a" -> "1"), storageFormat)
lazy val partWithMoreColumns =
CatalogTablePartition(Map("a" -> "5", "b" -> "6", "c" -> "7"), storageFormat)
lazy val partWithUnknownColumns =
CatalogTablePartition(Map("a" -> "5", "unknown" -> "6"), storageFormat)
lazy val funcClass = "org.apache.spark.myFunc"

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,8 +482,10 @@ class SessionCatalogSuite extends SparkFunSuite {
assert(catalogPartitionsEqual(externalCatalog, "mydb", "tbl", Seq(part1, part2)))
// Create partitions without explicitly specifying database
sessionCatalog.setCurrentDatabase("mydb")
sessionCatalog.createPartitions(TableIdentifier("tbl"), Seq(part3), ignoreIfExists = false)
assert(catalogPartitionsEqual(externalCatalog, "mydb", "tbl", Seq(part1, part2, part3)))
sessionCatalog.createPartitions(
TableIdentifier("tbl"), Seq(partWithMixedOrder), ignoreIfExists = false)
assert(catalogPartitionsEqual(
externalCatalog, "mydb", "tbl", Seq(part1, part2, partWithMixedOrder)))
}

test("create partitions when database/table does not exist") {
Expand All @@ -508,6 +510,31 @@ class SessionCatalogSuite extends SparkFunSuite {
TableIdentifier("tbl2", Some("db2")), Seq(part1), ignoreIfExists = true)
}

test("create partitions with invalid part spec") {
val catalog = new SessionCatalog(newBasicCatalog())
var e = intercept[AnalysisException] {
catalog.createPartitions(
TableIdentifier("tbl2", Some("db2")),
Seq(part1, partWithLessColumns), ignoreIfExists = false)
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
e = intercept[AnalysisException] {
catalog.createPartitions(
TableIdentifier("tbl2", Some("db2")),
Seq(part1, partWithMoreColumns), ignoreIfExists = true)
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
e = intercept[AnalysisException] {
catalog.createPartitions(
TableIdentifier("tbl2", Some("db2")),
Seq(partWithUnknownColumns, part1), ignoreIfExists = true)
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
}

test("drop partitions") {
val externalCatalog = newBasicCatalog()
val sessionCatalog = new SessionCatalog(externalCatalog)
Expand Down Expand Up @@ -565,6 +592,28 @@ class SessionCatalogSuite extends SparkFunSuite {
ignoreIfNotExists = true)
}

test("drop partitions with invalid partition spec") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we test drop partitions with partWithLessColumns?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since In-Memory Catalog does not support it, the test will fail. Thus, I did not add it.

val catalog = new SessionCatalog(newBasicCatalog())
var e = intercept[AnalysisException] {
catalog.dropPartitions(
TableIdentifier("tbl2", Some("db2")),
Seq(partWithMoreColumns.spec),
ignoreIfNotExists = false)
}
assert(e.getMessage.contains(
"Partition spec is invalid. The spec (a, b, c) must be contained within " +
"the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
e = intercept[AnalysisException] {
catalog.dropPartitions(
TableIdentifier("tbl2", Some("db2")),
Seq(partWithUnknownColumns.spec),
ignoreIfNotExists = false)
}
assert(e.getMessage.contains(
"Partition spec is invalid. The spec (a, unknown) must be contained within " +
"the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
}

test("get partition") {
val catalog = new SessionCatalog(newBasicCatalog())
assert(catalog.getPartition(
Expand All @@ -591,6 +640,25 @@ class SessionCatalogSuite extends SparkFunSuite {
}
}

test("get partition with invalid partition spec") {
val catalog = new SessionCatalog(newBasicCatalog())
var e = intercept[AnalysisException] {
catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithLessColumns.spec)
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
e = intercept[AnalysisException] {
catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithMoreColumns.spec)
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
e = intercept[AnalysisException] {
catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithUnknownColumns.spec)
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
}

test("rename partitions") {
val catalog = new SessionCatalog(newBasicCatalog())
val newPart1 = part1.copy(spec = Map("a" -> "100", "b" -> "101"))
Expand Down Expand Up @@ -633,6 +701,31 @@ class SessionCatalogSuite extends SparkFunSuite {
}
}

test("rename partition with invalid partition spec") {
val catalog = new SessionCatalog(newBasicCatalog())
var e = intercept[AnalysisException] {
catalog.renamePartitions(
TableIdentifier("tbl1", Some("db2")),
Seq(part1.spec), Seq(partWithLessColumns.spec))
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
e = intercept[AnalysisException] {
catalog.renamePartitions(
TableIdentifier("tbl1", Some("db2")),
Seq(part1.spec), Seq(partWithMoreColumns.spec))
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
e = intercept[AnalysisException] {
catalog.renamePartitions(
TableIdentifier("tbl1", Some("db2")),
Seq(part1.spec), Seq(partWithUnknownColumns.spec))
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
}

test("alter partitions") {
val catalog = new SessionCatalog(newBasicCatalog())
val newLocation = newUriForDatabase()
Expand Down Expand Up @@ -673,6 +766,25 @@ class SessionCatalogSuite extends SparkFunSuite {
}
}

test("alter partition with invalid partition spec") {
val catalog = new SessionCatalog(newBasicCatalog())
var e = intercept[AnalysisException] {
catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithLessColumns))
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
e = intercept[AnalysisException] {
catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithMoreColumns))
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
e = intercept[AnalysisException] {
catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithUnknownColumns))
}
assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
"the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
}

test("list partitions") {
val catalog = new SessionCatalog(newBasicCatalog())
assert(catalog.listPartitions(TableIdentifier("tbl2", Some("db2"))).toSet == Set(part1, part2))
Expand Down
Loading