Skip to content

Commit

Permalink
[SPARK-21330][SQL] Bad partitioning does not allow to read a JDBC tab…
Browse files Browse the repository at this point in the history
…le with extreme values on the partition column

## What changes were proposed in this pull request?

An overflow of the difference of bounds on the partitioning column leads to no data being read. This
patch checks for this overflow.

## How was this patch tested?

New unit test.

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #18800 from aray/SPARK-21330.

(cherry picked from commit 25826c7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
  • Loading branch information
aray authored and srowen committed Aug 4, 2017
1 parent d93e45b commit 734b144
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ private[sql] object JDBCRelation extends Logging {
s"bound. Lower bound: $lowerBound; Upper bound: $upperBound")

val numPartitions =
if ((upperBound - lowerBound) >= partitioning.numPartitions) {
if ((upperBound - lowerBound) >= partitioning.numPartitions || /* check for overflow */
(upperBound - lowerBound) < 0) {
partitioning.numPartitions
} else {
logWarning("The number of partitions is reduced because the specified number of " +
Expand Down
15 changes: 15 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ class JDBCSuite extends SparkFunSuite
| partitionColumn 'THEID', lowerBound '1', upperBound '4', numPartitions '3')
""".stripMargin.replaceAll("\n", " "))

sql(
s"""
|CREATE OR REPLACE TEMPORARY VIEW partsoverflow
|USING org.apache.spark.sql.jdbc
|OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass',
| partitionColumn 'THEID', lowerBound '-9223372036854775808',
| upperBound '9223372036854775807', numPartitions '3')
""".stripMargin.replaceAll("\n", " "))

conn.prepareStatement("create table test.inttypes (a INT, b BOOLEAN, c TINYINT, "
+ "d SMALLINT, e BIGINT)").executeUpdate()
conn.prepareStatement("insert into test.inttypes values (1, false, 3, 4, 1234567890123)"
Expand Down Expand Up @@ -366,6 +375,12 @@ class JDBCSuite extends SparkFunSuite
assert(ids(2) === 3)
}

test("overflow of partition bound difference does not give negative stride") {
val df = sql("SELECT * FROM partsoverflow")
checkNumPartitions(df, expectedNumPartitions = 3)
assert(df.collect().length == 3)
}

test("Register JDBC query with renamed fields") {
// Regression test for bug SPARK-7345
sql(
Expand Down

0 comments on commit 734b144

Please sign in to comment.