Skip to content

Commit

Permalink
fix: remove query text from exception message, use `exception.debug_m…
Browse files Browse the repository at this point in the history
…essage` instead (#1105)

Since query text can potentially contain sensitive information, remove it from
the default exception message. This information is useful for debugging, so
provide a `debug_message` attribute, which is not included in the exception
representation (and thus the logs).

Fixes internal issue 211616590
  • Loading branch information
tswast committed Jan 12, 2022
1 parent 18ee0b7 commit e23114c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
19 changes: 13 additions & 6 deletions google/cloud/bigquery/job/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@


_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE)
_EXCEPTION_FOOTER_TEMPLATE = "{message}\n\nLocation: {location}\nJob ID: {job_id}\n"
_TIMEOUT_BUFFER_SECS = 0.1


Expand Down Expand Up @@ -1196,17 +1197,17 @@ def _blocking_poll(self, timeout=None, **kwargs):
super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs)

@staticmethod
def _format_for_exception(query, job_id):
def _format_for_exception(message: str, query: str):
"""Format a query for the output in exception message.
Args:
message (str): The original exception message.
query (str): The SQL query to format.
job_id (str): The ID of the job that ran the query.
Returns:
str: A formatted query text.
"""
template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}"
template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}"

lines = query.splitlines()
max_line_len = max(len(line) for line in lines)
Expand All @@ -1223,7 +1224,7 @@ def _format_for_exception(query, job_id):
"{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1)
)

return template.format(job_id=job_id, header=header, ruler=ruler, body=body)
return template.format(message=message, header=header, ruler=ruler, body=body)

def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
"""API call: begin the job via a POST request
Expand All @@ -1248,7 +1249,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
try:
super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout)
except exceptions.GoogleAPICallError as exc:
exc.message += self._format_for_exception(self.query, self.job_id)
exc.message = _EXCEPTION_FOOTER_TEMPLATE.format(
message=exc.message, location=self.location, job_id=self.job_id
)
exc.debug_message = self._format_for_exception(exc.message, self.query)
exc.query_job = self
raise

Expand Down Expand Up @@ -1447,7 +1451,10 @@ def do_get_result():
do_get_result()

except exceptions.GoogleAPICallError as exc:
exc.message += self._format_for_exception(self.query, self.job_id)
exc.message = _EXCEPTION_FOOTER_TEMPLATE.format(
message=exc.message, location=self.location, job_id=self.job_id
)
exc.debug_message = self._format_for_exception(exc.message, self.query) # type: ignore
exc.query_job = self # type: ignore
raise
except requests.exceptions.Timeout as exc:
Expand Down
20 changes: 16 additions & 4 deletions tests/unit/job/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,13 +1360,19 @@ def test_result_error(self):
exc_job_instance = getattr(exc_info.exception, "query_job", None)
self.assertIs(exc_job_instance, job)

# Query text could contain sensitive information, so it must not be
# included in logs / exception representation.
full_text = str(exc_info.exception)
assert job.job_id in full_text
assert "Query Job SQL Follows" in full_text
assert "Query Job SQL Follows" not in full_text

# It is useful to have query text available, so it is provided in a
# debug_message property.
debug_message = exc_info.exception.debug_message
assert "Query Job SQL Follows" in debug_message
for i, line in enumerate(query.splitlines(), start=1):
expected_line = "{}:{}".format(i, line)
assert expected_line in full_text
assert expected_line in debug_message

def test_result_transport_timeout_error(self):
query = textwrap.dedent(
Expand Down Expand Up @@ -1452,13 +1458,19 @@ def test__begin_error(self):
exc_job_instance = getattr(exc_info.exception, "query_job", None)
self.assertIs(exc_job_instance, job)

# Query text could contain sensitive information, so it must not be
# included in logs / exception representation.
full_text = str(exc_info.exception)
assert job.job_id in full_text
assert "Query Job SQL Follows" in full_text
assert "Query Job SQL Follows" not in full_text

# It is useful to have query text available, so it is provided in a
# debug_message property.
debug_message = exc_info.exception.debug_message
assert "Query Job SQL Follows" in debug_message
for i, line in enumerate(query.splitlines(), start=1):
expected_line = "{}:{}".format(i, line)
assert expected_line in full_text
assert expected_line in debug_message

def test__begin_w_timeout(self):
PATH = "/projects/%s/jobs" % (self.PROJECT,)
Expand Down

0 comments on commit e23114c

Please sign in to comment.