Skip to content

Commit

Permalink
[SPARK-12624][PYSPARK] Checks row length when converting Java arrays …
Browse files Browse the repository at this point in the history
…to Python rows

When actual row length doesn't conform to specified schema field length, we should give a better error message instead of throwing an unintuitive `ArrayOutOfBoundsException`.

Author: Cheng Lian <lian@databricks.com>

Closes #10886 from liancheng/spark-12624.

(cherry picked from commit 3327fd2)
Signed-off-by: Yin Huai <yhuai@databricks.com>
  • Loading branch information
liancheng authored and yhuai committed Jan 25, 2016
1 parent f913f7e commit 88614dd
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
9 changes: 9 additions & 0 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,15 @@ def test_infer_schema_to_local(self):
df3 = self.sqlCtx.createDataFrame(rdd, df.schema)
self.assertEqual(10, df3.count())

def test_create_dataframe_schema_mismatch(self):
input = [Row(a=1)]
rdd = self.sc.parallelize(range(3)).map(lambda i: Row(a=i))
schema = StructType([StructField("a", IntegerType()), StructField("b", StringType())])
df = self.sqlCtx.createDataFrame(rdd, schema)
message = ".*Input row doesn't have expected number of values required by the schema.*"
with self.assertRaisesRegexp(Exception, message):
df.show()

def test_serialize_nested_array_and_map(self):
d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
rdd = self.sc.parallelize(d)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,14 @@ object EvaluatePython {
ArrayBasedMapData(keys, values)

case (c, StructType(fields)) if c.getClass.isArray =>
new GenericInternalRow(c.asInstanceOf[Array[_]].zip(fields).map {
val array = c.asInstanceOf[Array[_]]
if (array.length != fields.length) {
throw new IllegalStateException(
s"Input row doesn't have expected number of values required by the schema. " +
s"${fields.length} fields are required while ${array.length} values are provided."
)
}
new GenericInternalRow(array.zip(fields).map {
case (e, f) => fromJava(e, f.dataType)
})

Expand Down

0 comments on commit 88614dd

Please sign in to comment.