Skip to content

Commit

Permalink
Mask other forms of password arguments in SparkSubmitOperator (#9615)
Browse files Browse the repository at this point in the history
This is a follow-up to #6917 before modifying the masking code.
Related: #9595.
  • Loading branch information
Unit03 committed Jul 11, 2020
1 parent 3cc5756 commit 0873070
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 2 deletions.
19 changes: 17 additions & 2 deletions airflow/providers/apache/spark/hooks/spark_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,23 @@ def _mask_cmd(self, connection_cmd):
# Mask any password related fields in application args with key value pair
# where key contains password (case insensitive), e.g. HivePassword='abc'
connection_cmd_masked = re.sub(
r"(\S*?(?:secret|password)\S*?\s*=\s*')[^']*(?=')",
r'\1******', ' '.join(connection_cmd), flags=re.I)
r"("
r"\S*?" # Match all non-whitespace characters before...
r"(?:secret|password)" # ...literally a "secret" or "password"
# word (not capturing them).
r"\S*?" # All non-whitespace characters before either...
r"(?:=|\s+)" # ...an equal sign or whitespace characters
# (not capturing them).
r"(['\"]?)" # An optional single or double quote.
r")" # This is the end of the first capturing group.
r"(?:(?!\2\s).)*" # All characters between optional quotes
# (matched above); if the value is quoted,
# it may contain whitespace.
r"(\2)", # Optional matching quote.
r'\1******\3',
' '.join(connection_cmd),
flags=re.I,
)

return connection_cmd_masked

Expand Down
48 changes: 48 additions & 0 deletions tests/providers/apache/spark/hooks/test_spark_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import unittest
from unittest.mock import call, patch

from parameterized import parameterized

from airflow.exceptions import AirflowException
from airflow.models import Connection
from airflow.providers.apache.spark.hooks.spark_submit import SparkSubmitHook
Expand Down Expand Up @@ -767,3 +769,49 @@ def test_k8s_process_on_kill(self, mock_popen, mock_client_method):
client.delete_namespaced_pod.assert_called_once_with(
'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver',
'mynamespace', **kwargs)

@parameterized.expand(
(
(
("spark-submit", "foo", "--bar", "baz", "--password='secret'", "--foo", "bar"),
"spark-submit foo --bar baz --password='******' --foo bar",
),
(
("spark-submit", "foo", "--bar", "baz", "--password='secret'"),
"spark-submit foo --bar baz --password='******'",
),
(
("spark-submit", "foo", "--bar", "baz", '--password="secret"'),
'spark-submit foo --bar baz --password="******"',
),
(
("spark-submit", "foo", "--bar", "baz", '--password=secret'),
'spark-submit foo --bar baz --password=******',
),
(
("spark-submit", "foo", "--bar", "baz", "--password 'secret'"),
"spark-submit foo --bar baz --password '******'",
),
(
("spark-submit", "foo", "--bar", "baz", "--password='sec\"ret'"),
"spark-submit foo --bar baz --password='******'",
),
(
("spark-submit", "foo", "--bar", "baz", '--password="sec\'ret"'),
'spark-submit foo --bar baz --password="******"',
),
(
("spark-submit",),
"spark-submit",
),
)
)
def test_masks_passwords(self, command: str, expected: str) -> None:
# Given
hook = SparkSubmitHook()

# When
command_masked = hook._mask_cmd(command)

# Then
assert command_masked == expected

0 comments on commit 0873070

Please sign in to comment.