Skip to content

Commit

Permalink
[SPARK-35576][SQL] Redact the sensitive info in the result of Set com…
Browse files Browse the repository at this point in the history
…mand

### What changes were proposed in this pull request?

Currently, the results of following SQL queries are not redacted:
```
SET [KEY];
SET;
```
For example:

```
scala> spark.sql("set javax.jdo.option.ConnectionPassword=123456").show()
+--------------------+------+
|                 key| value|
+--------------------+------+
|javax.jdo.option....|123456|
+--------------------+------+

scala> spark.sql("set javax.jdo.option.ConnectionPassword").show()
+--------------------+------+
|                 key| value|
+--------------------+------+
|javax.jdo.option....|123456|
+--------------------+------+

scala> spark.sql("set").show()
+--------------------+--------------------+
|                 key|               value|
+--------------------+--------------------+
|javax.jdo.option....|              123456|

```

We should hide the sensitive information and redact the query output.

### Why are the changes needed?

Security.

### Does this PR introduce _any_ user-facing change?

Yes, the sensitive information in the output of Set commands are redacted

### How was this patch tested?

Unit test

Closes #32712 from gengliangwang/redactSet.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
gengliangwang authored and dongjoon-hyun committed May 31, 2021
1 parent cd2ef9c commit 8e11f5f
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
Expand Up @@ -4051,11 +4051,18 @@ class SQLConf extends Serializable with Logging {
* Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
*/
def redactOptions[K, V](options: Map[K, V]): Map[K, V] = {
redactOptions(options.toSeq).toMap
}

/**
* Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
*/
def redactOptions[K, V](options: Seq[(K, V)]): Seq[(K, V)] = {
val regexes = Seq(
getConf(SQL_OPTIONS_REDACTION_PATTERN),
SECRET_REDACTION_PATTERN.readFrom(reader))

regexes.foldLeft(options.toSeq) { case (opts, r) => Utils.redact(Some(r), opts) }.toMap
regexes.foldLeft(options) { case (opts, r) => Utils.redact(Some(r), opts) }
}

/**
Expand Down
Expand Up @@ -107,7 +107,8 @@ case class SetCommand(kv: Option[(String, Option[String])])
// Queries all key-value pairs that are set in the SQLConf of the sparkSession.
case None =>
val runFunc = (sparkSession: SparkSession) => {
sparkSession.conf.getAll.toSeq.sorted.map { case (k, v) => Row(k, v) }
val redactedConf = SQLConf.get.redactOptions(sparkSession.conf.getAll)
redactedConf.toSeq.sorted.map { case (k, v) => Row(k, v) }
}
(keyValueOutput, runFunc)

Expand Down Expand Up @@ -162,7 +163,8 @@ case class SetCommand(kv: Option[(String, Option[String])])
// very likely to change them based the default value they see.
sparkSession.sharedState.hadoopConf.get(key, "<undefined>")
}
Seq(Row(key, value))
val (_, redactedValue) = SQLConf.get.redactOptions(Seq((key, value))).head
Seq(Row(key, redactedValue))
}
(keyValueOutput, runFunc)
}
Expand Down
13 changes: 13 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Expand Up @@ -1084,6 +1084,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
checkAnswer(sql("SET io.file.buffer.size"), Row("io.file.buffer.size", "65536"))
}

test("SPARK-35576: Set command should redact sensitive data") {
val key1 = "test.password"
val value1 = "test.value1"
val key2 = "test.token"
val value2 = "test.value2"
withSQLConf (key1 -> value1, key2 -> value2) {
checkAnswer(sql(s"SET $key1"), Row(key1, "*********(redacted)"))
checkAnswer(sql(s"SET $key2"), Row(key2, "*********(redacted)"))
val allValues = sql("SET").collect().map(_.getString(1))
assert(!allValues.exists(v => v.contains(value1) || v.contains(value2)))
}
}

test("apply schema") {
withTempView("applySchema1", "applySchema2", "applySchema3") {
val schema1 = StructType(
Expand Down

0 comments on commit 8e11f5f

Please sign in to comment.