From 9813b49e17f0da59e69bc84c85a15adcefddfc5c Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 28 Nov 2025 16:41:34 -0300 Subject: [PATCH 1/2] Extract warning and error messages from pgBackRest logs Sometimes the error message is sent to the stdout instead of the stderr, so it's important to check both Signed-off-by: Marcelo Henrique Neppel --- src/backups.py | 62 +++++++++++++++++++++++++----- tests/unit/test_backups.py | 78 +++++++++++++++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 10 deletions(-) diff --git a/src/backups.py b/src/backups.py index ee67a00ab8..c3b60732be 100644 --- a/src/backups.py +++ b/src/backups.py @@ -195,7 +195,8 @@ def can_use_s3_repository(self) -> tuple[bool, str]: else: if return_code != 0: - logger.error(f"Failed to run pgbackrest: {stderr}") + extracted_error = self._extract_error_message(stdout, stderr) + logger.error(f"Failed to run pgbackrest: {extracted_error}") return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE for stanza in json.loads(stdout): @@ -357,6 +358,41 @@ def _execute_command( ) return process.returncode, process.stdout.decode(), process.stderr.decode() + @staticmethod + def _extract_error_message(stdout: str, stderr: str) -> str: + """Extract key error message from pgBackRest output. + + Args: + stdout: Standard output from pgBackRest command. + stderr: Standard error from pgBackRest command. + + Returns: + Extracted error message, prioritizing ERROR/WARN lines from output. + """ + combined_output = f"{stdout}\n{stderr}".strip() + if not combined_output: + return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}" + + # Extract lines with ERROR or WARN markers from pgBackRest output + error_lines = [] + for line in combined_output.splitlines(): + if "ERROR:" in line or "WARN:" in line: + # Clean up the line by removing debug prefixes like "P00 ERROR:" + cleaned = re.sub(r"^.*?(ERROR:|WARN:)", r"\1", line).strip() + error_lines.append(cleaned) + + # If we found error/warning lines, return them joined + if error_lines: + return " ".join(error_lines) + + # Otherwise return the last non-empty line from stderr or stdout + if stderr.strip(): + return stderr.strip().splitlines()[-1] + if stdout.strip(): + return stdout.strip().splitlines()[-1] + + return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}" + def _format_backup_list(self, backup_list) -> str: """Formats provided list of backups as a table.""" s3_parameters, _ = self._retrieve_s3_parameters() @@ -405,7 +441,8 @@ def _generate_backup_list_output(self) -> str: "--output=json", ]) if return_code != 0: - raise ListBackupsError(f"Failed to list backups with error: {stderr}") + extracted_error = self._extract_error_message(output, stderr) + raise ListBackupsError(f"Failed to list backups with error: {extracted_error}") backups = json.loads(output)[0]["backup"] for backup in backups: @@ -476,7 +513,8 @@ def _list_backups(self, show_failed: bool, parse=True) -> dict[str, tuple[str, s "--output=json", ]) if return_code != 0: - raise ListBackupsError(f"Failed to list backups with error: {stderr}") + extracted_error = self._extract_error_message(output, stderr) + raise ListBackupsError(f"Failed to list backups with error: {extracted_error}") repository_info = next(iter(json.loads(output)), None) @@ -511,7 +549,8 @@ def _list_timelines(self) -> dict[str, tuple[str, str]]: "--output=json", ]) if return_code != 0: - raise ListBackupsError(f"Failed to list repository with error: {stderr}") + extracted_error = self._extract_error_message(output, stderr) + raise ListBackupsError(f"Failed to list repository with error: {extracted_error}") repository = json.loads(output).items() if repository is None: @@ -733,15 +772,16 @@ def _is_primary_pgbackrest_service_running(self) -> bool: if not self.charm.primary_endpoint: logger.warning("Failed to contact pgBackRest TLS server: no primary endpoint") return False - return_code, _, stderr = self._execute_command([ + return_code, stdout, stderr = self._execute_command([ PGBACKREST_EXECUTABLE, "server-ping", "--io-timeout=10", self.charm.primary_endpoint, ]) if return_code != 0: + extracted_error = self._extract_error_message(stdout, stderr) logger.warning( - f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {stderr}" + f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {extracted_error}" ) return return_code == 0 @@ -967,7 +1007,8 @@ def _run_backup( f"backup/{self.stanza_name}/{backup_id}/backup.log", s3_parameters, ) - error_message = f"Failed to backup PostgreSQL with error: {stderr}" + extracted_error = self._extract_error_message(stdout, stderr) + error_message = f"Failed to backup PostgreSQL with error: {extracted_error}" logger.error(f"Backup failed: {error_message}") event.fail(error_message) else: @@ -1122,7 +1163,7 @@ def _on_restore_action(self, event): # noqa: C901 # Remove previous cluster information to make it possible to initialise a new cluster. logger.info("Removing previous cluster information") - return_code, _, stderr = self._execute_command( + return_code, stdout, stderr = self._execute_command( [ "charmed-postgresql.patronictl", "-c", @@ -1134,7 +1175,10 @@ def _on_restore_action(self, event): # noqa: C901 timeout=10, ) if return_code != 0: - error_message = f"Failed to remove previous cluster information with error: {stderr}" + extracted_error = self._extract_error_message(stdout, stderr) + error_message = ( + f"Failed to remove previous cluster information with error: {extracted_error}" + ) logger.error(f"Restore failed: {error_message}") event.fail(error_message) return diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 5a26476deb..0780b21260 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -13,7 +13,7 @@ from ops.testing import Harness from tenacity import RetryError, wait_fixed -from backups import ListBackupsError +from backups import ListBackupsError, PostgreSQLBackups from charm import PostgresqlOperatorCharm from constants import PEER @@ -37,6 +37,82 @@ def harness(): harness.cleanup() +def test_extract_error_message_with_error_in_stdout(): + """Test extracting error from stdout with ERROR marker.""" + stdout = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed + HINT: are all available clusters in recovery?""" + stderr = "" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "ERROR: [056]: unable to find primary cluster - cannot proceed" + + +def test_extract_error_message_with_error_in_stderr(): + """Test extracting error from stderr when no ERROR marker.""" + stdout = "" + stderr = "Connection refused: cannot connect to S3" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "Connection refused: cannot connect to S3" + + +def test_extract_error_message_with_both_stdout_and_stderr(): + """Test extracting error when ERROR marker is in stdout and stderr has additional info.""" + stdout = "P00 ERROR: database error occurred" + stderr = "Additional stderr info" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "ERROR: database error occurred" + + +def test_extract_error_message_with_warning(): + """Test extracting warning message from output.""" + stdout = "P00 WARN: configuration issue detected" + stderr = "" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "WARN: configuration issue detected" + + +def test_extract_error_message_with_multiple_errors(): + """Test extracting multiple ERROR/WARN lines.""" + stdout = """P00 ERROR: first error +P00 WARN: warning message +P00 ERROR: second error""" + stderr = "" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "ERROR: first error WARN: warning message ERROR: second error" + + +def test_extract_error_message_with_empty_output(): + """Test with empty stdout and stderr returns helpful message.""" + result = PostgreSQLBackups._extract_error_message("", "") + assert ( + result + == "Unknown error occurred. Please check the logs at /var/snap/charmed-postgresql/common/var/log/pgbackrest" + ) + + +def test_extract_error_message_fallback_to_stdout_last_line(): + """Test fallback to last line of stdout when no ERROR/WARN markers.""" + stdout = "Some generic output\nMore details here\nFinal error message" + stderr = "" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "Final error message" + + +def test_extract_error_message_fallback_to_stderr_last_line(): + """Test fallback to last line of stderr when no ERROR/WARN markers and no stdout.""" + stdout = "" + stderr = "Line 1\nLine 2\nFinal error message" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "Final error message" + + +def test_extract_error_message_cleans_debug_prefix(): + """Test that debug prefixes like 'P00 ERROR:' are cleaned up.""" + stdout = "2025-11-07 07:21:11.120 P00 ERROR: test error message" + stderr = "" + result = PostgreSQLBackups._extract_error_message(stdout, stderr) + assert result == "ERROR: test error message" + + def test_stanza_name(harness): assert ( harness.charm.backup.stanza_name From 1592518c0013aa0f32fc20ccca72afe6b6035fe4 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Wed, 10 Dec 2025 17:17:49 -0300 Subject: [PATCH 2/2] Use semicolon separator for pgBackRest error messages Concatenate multiple ERROR/WARN lines with "; " instead of " " for improved readability when debugging backup issues. Updated unit test to match new separator format. Cherry-picked from canonical/postgresql-operator#1328 (commit 1f1b601). Signed-off-by: Marcelo Henrique Neppel --- src/backups.py | 2 +- tests/unit/test_backups.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backups.py b/src/backups.py index c3b60732be..d72fefd705 100644 --- a/src/backups.py +++ b/src/backups.py @@ -383,7 +383,7 @@ def _extract_error_message(stdout: str, stderr: str) -> str: # If we found error/warning lines, return them joined if error_lines: - return " ".join(error_lines) + return "; ".join(error_lines) # Otherwise return the last non-empty line from stderr or stdout if stderr.strip(): diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 0780b21260..472fd9b29e 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -77,7 +77,7 @@ def test_extract_error_message_with_multiple_errors(): P00 ERROR: second error""" stderr = "" result = PostgreSQLBackups._extract_error_message(stdout, stderr) - assert result == "ERROR: first error WARN: warning message ERROR: second error" + assert result == "ERROR: first error; WARN: warning message; ERROR: second error" def test_extract_error_message_with_empty_output():