From 15d57f9c23145ace37d1631d8f9c19675c142214 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 3 Mar 2016 20:16:37 -0800 Subject: [PATCH] [SPARK-13647] [SQL] also check if numeric value is within allowed range in _verify_type ## What changes were proposed in this pull request? This PR makes the `_verify_type` in `types.py` more strict, also check if numeric value is within allowed range. ## How was this patch tested? newly added doc test. Author: Wenchen Fan Closes #11492 from cloud-fan/py-verify. --- python/pyspark/sql/types.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 5bc0773fa8660..d1f5b47242684 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1093,8 +1093,11 @@ def _infer_schema_type(obj, dataType): def _verify_type(obj, dataType): """ - Verify the type of obj against dataType, raise an exception if - they do not match. + Verify the type of obj against dataType, raise a TypeError if they do not match. + + Also verify the value of obj against datatype, raise a ValueError if it's not within the allowed + range, e.g. using 128 as ByteType will overflow. Note that, Python float is not checked, so it + will become infinity when cast to Java float if it overflows. >>> _verify_type(None, StructType([])) >>> _verify_type("", StringType()) @@ -1111,6 +1114,12 @@ def _verify_type(obj, dataType): Traceback (most recent call last): ... ValueError:... + >>> # Check if numeric values are within the allowed range. + >>> _verify_type(12, ByteType()) + >>> _verify_type(1234, ByteType()) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError:... """ # all objects are nullable if obj is None: @@ -1137,7 +1146,19 @@ def _verify_type(obj, dataType): if type(obj) not in _acceptable_types[_type]: raise TypeError("%s can not accept object %r in type %s" % (dataType, obj, type(obj))) - if isinstance(dataType, ArrayType): + if isinstance(dataType, ByteType): + if obj < -128 or obj > 127: + raise ValueError("object of ByteType out of range, got: %s" % obj) + + elif isinstance(dataType, ShortType): + if obj < -32768 or obj > 32767: + raise ValueError("object of ShortType out of range, got: %s" % obj) + + elif isinstance(dataType, IntegerType): + if obj < -2147483648 or obj > 2147483647: + raise ValueError("object of IntegerType out of range, got: %s" % obj) + + elif isinstance(dataType, ArrayType): for i in obj: _verify_type(i, dataType.elementType)