Skip to content

Commit

Permalink
Add columnSchema method to PySpark image reader
Browse files Browse the repository at this point in the history
  • Loading branch information
HyukjinKwon committed Feb 1, 2018
1 parent e15da5b commit e180ade
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
20 changes: 19 additions & 1 deletion python/pyspark/ml/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class _ImageSchema(object):
def __init__(self):
self._imageSchema = None
self._ocvTypes = None
self._columnSchema = None
self._imageFields = None
self._undefinedImageType = None

Expand All @@ -49,7 +50,7 @@ def imageSchema(self):
Returns the image schema.
:return: a :class:`StructType` with a single column of images
named "image" (nullable).
named "image" (nullable) and having the same type returned by :meth:`columnSchema`.
.. versionadded:: 2.3.0
"""
Expand All @@ -75,6 +76,23 @@ def ocvTypes(self):
self._ocvTypes = dict(ctx._jvm.org.apache.spark.ml.image.ImageSchema.javaOcvTypes())
return self._ocvTypes

@property
def columnSchema(self):
"""
Returns the schema for the image column.
:return: a :class:`StructType` for image column,
``struct<origin:string, height:int, width:int, nChannels:int, mode:int, data:binary>``.
.. versionadded:: 2.3.0
"""

if self._columnSchema is None:
ctx = SparkContext._active_spark_context
jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema()
self._columnSchema = _parse_datatype_json_string(jschema.json())
return self._columnSchema

@property
def imageFields(self):
"""
Expand Down
1 change: 1 addition & 0 deletions python/pyspark/ml/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1852,6 +1852,7 @@ def test_read_images(self):
self.assertEqual(len(array), first_row[1])
self.assertEqual(ImageSchema.toImage(array, origin=first_row[0]), first_row)
self.assertEqual(df.schema, ImageSchema.imageSchema)
self.assertEqual(df.schema["image"].dataType, ImageSchema.columnSchema)
expected = {'CV_8UC3': 16, 'Undefined': -1, 'CV_8U': 0, 'CV_8UC1': 0, 'CV_8UC4': 24}
self.assertEqual(ImageSchema.ocvTypes, expected)
expected = ['origin', 'height', 'width', 'nChannels', 'mode', 'data']
Expand Down

0 comments on commit e180ade

Please sign in to comment.