Skip to content
Permalink
Browse files

[SPARK-29444][FOLLOWUP] add doc and python parameter for ignoreNullFi…

…elds in json generating

# What changes were proposed in this pull request?
Add description for ignoreNullFields, which is commited in #26098 , in DataFrameWriter and readwriter.py.
Enable user to use ignoreNullFields in pyspark.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
run unit tests

Closes #26227 from stczwd/json-generator-doc.

Authored-by: stczwd <qcsd2011@163.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
stczwd authored and dongjoon-hyun committed Oct 24, 2019
1 parent cdea520 commit dcf5eaf1a6c0330a9460e168c1c3fee21998ba65
@@ -788,7 +788,7 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options)

@since(1.4)
def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None,
lineSep=None, encoding=None):
lineSep=None, encoding=None, ignoreNullFields=None):
"""Saves the content of the :class:`DataFrame` in JSON format
(`JSON Lines text format or newline-delimited JSON <http://jsonlines.org/>`_) at the
specified path.
@@ -817,13 +817,15 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm
the default UTF-8 charset will be used.
:param lineSep: defines the line separator that should be used for writing. If None is
set, it uses the default value, ``\\n``.
:param ignoreNullFields: Whether to ignore null fields when generating JSON objects.
If None is set, it uses the default value, ``true``.
>>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
"""
self.mode(mode)
self._set_opts(
compression=compression, dateFormat=dateFormat, timestampFormat=timestampFormat,
lineSep=lineSep, encoding=encoding)
lineSep=lineSep, encoding=encoding, ignoreNullFields=ignoreNullFields)
self._jwrite.json(path)

@since(1.4)
@@ -78,8 +78,8 @@ private[sql] class JSONOptions(
val dropFieldIfAllNull = parameters.get("dropFieldIfAllNull").map(_.toBoolean).getOrElse(false)

// Whether to ignore null fields during json generating
val ignoreNullFields = parameters.getOrElse("ignoreNullFields",
SQLConf.get.jsonGeneratorIgnoreNullFields).toBoolean
val ignoreNullFields = parameters.get("ignoreNullFields").map(_.toBoolean)
.getOrElse(SQLConf.get.jsonGeneratorIgnoreNullFields)

// A language tag in IETF BCP 47 format
val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
@@ -1196,9 +1196,11 @@ object SQLConf {

val JSON_GENERATOR_IGNORE_NULL_FIELDS =
buildConf("spark.sql.jsonGenerator.ignoreNullFields")
.doc("If false, JacksonGenerator will generate null for null fields in Struct.")
.stringConf
.createWithDefault("true")
.doc("Whether to ignore null fields when generating JSON objects in JSON data source and " +
"JSON functions such as to_json. " +
"If false, it generates null for null fields in JSON objects.")
.booleanConf
.createWithDefault(true)

val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
.internal()
@@ -2392,7 +2394,7 @@ class SQLConf extends Serializable with Logging {

def sessionLocalTimeZone: String = getConf(SQLConf.SESSION_LOCAL_TIMEZONE)

def jsonGeneratorIgnoreNullFields: String = getConf(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS)
def jsonGeneratorIgnoreNullFields: Boolean = getConf(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS)

def parallelFileListingInStatsComputation: Boolean =
getConf(SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION)
@@ -687,6 +687,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* <li>`encoding` (by default it is not set): specifies encoding (charset) of saved json
* files. If it is not set, the UTF-8 charset will be used. </li>
* <li>`lineSep` (default `\n`): defines the line separator that should be used for writing.</li>
* <li>`ignoreNullFields` (default `true`): Whether to ignore null fields
* when generating JSON objects. </li>
* </ul>
*
* @since 1.4.0

0 comments on commit dcf5eaf

Please sign in to comment.
You can’t perform that action at this time.