From a03959c31c4b878dc01352ab7fc1ca491646df8e Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Sun, 14 Sep 2025 16:41:08 +0300 Subject: [PATCH 1/2] Handle empty region --- .github/workflows/integration_test.yaml | 4 +- src/backups.py | 46 +++-- templates/pgbackrest.conf.j2 | 4 + tests/integration/conftest.py | 2 +- tests/integration/test_backups_ceph.py | 206 ++++++++++++++++++++ tests/spread/test_backups_ceph.py/task.yaml | 7 + tests/unit/test_backups.py | 7 +- 7 files changed, 249 insertions(+), 27 deletions(-) create mode 100644 tests/integration/test_backups_ceph.py create mode 100644 tests/spread/test_backups_ceph.py/task.yaml diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index e801c6ca29..5bbec160ed 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -84,10 +84,10 @@ jobs: needs: - collect-integration-tests runs-on: ${{ matrix.job.runner }} - timeout-minutes: 217 # Sum of steps `timeout-minutes` + 5 + timeout-minutes: 226 # Sum of steps `timeout-minutes` + 5 steps: - name: Free up disk space - timeout-minutes: 1 + timeout-minutes: 10 run: | printf '\nDisk usage before cleanup\n' df --human-readable diff --git a/src/backups.py b/src/backups.py index 5619361517..cd310b5050 100644 --- a/src/backups.py +++ b/src/backups.py @@ -12,9 +12,11 @@ from datetime import datetime, timezone from io import BytesIO -import boto3 -import botocore +from boto3.session import Session +from botocore.client import Config from botocore.exceptions import ClientError +from botocore.loaders import create_loader +from botocore.regions import EndpointResolver from charms.data_platform_libs.v0.s3 import CredentialsChangedEvent, S3Requirer from jinja2 import Template from lightkube import ApiError, Client @@ -89,16 +91,18 @@ def _tls_ca_chain_filename(self) -> str: return "" def _get_s3_session_resource(self, s3_parameters: dict): - session = boto3.session.Session( - aws_access_key_id=s3_parameters["access-key"], - aws_secret_access_key=s3_parameters["secret-key"], - region_name=s3_parameters["region"], - ) + kwargs = { + "aws_access_key_id": s3_parameters["access-key"], + "aws_secret_access_key": s3_parameters["secret-key"], + } + if "region" in s3_parameters: + kwargs["region_name"] = s3_parameters["region"] + session = Session(**kwargs) return session.resource( "s3", endpoint_url=self._construct_endpoint(s3_parameters), verify=(self._tls_ca_chain_filename or None), - config=botocore.client.Config( + config=Config( # https://github.com/boto/boto3/issues/4400#issuecomment-2600742103 request_checksum_calculation="when_required", response_checksum_validation="when_required", @@ -187,9 +191,12 @@ def can_use_s3_repository(self) -> tuple[bool, str | None]: return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE for stanza in json.loads(output): - if stanza.get("name") != self.stanza_name: + if (stanza_name := stanza.get("name")) and stanza_name == "[invalid]": + logger.error("Invalid stanza name from s3") + return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE + if stanza_name != self.stanza_name: logger.debug( - f"can_use_s3_repository: incompatible stanza name s3={stanza.get('name', '')}, local={self.stanza_name}" + f"can_use_s3_repository: incompatible stanza name s3={stanza_name or ''}, local={self.stanza_name}" ) return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE @@ -224,12 +231,12 @@ def _construct_endpoint(self, s3_parameters: dict) -> str: endpoint = s3_parameters["endpoint"] # Load endpoints data. - loader = botocore.loaders.create_loader() + loader = create_loader() data = loader.load_data("endpoints") # Construct the endpoint using the region. - resolver = botocore.regions.EndpointResolver(data) - endpoint_data = resolver.construct_endpoint("s3", s3_parameters["region"]) + resolver = EndpointResolver(data) + endpoint_data = resolver.construct_endpoint("s3", s3_parameters.get("region")) # Use the built endpoint if it is an AWS endpoint. if endpoint_data and endpoint.endswith(endpoint_data["dnsSuffix"]): @@ -243,7 +250,7 @@ def _create_bucket_if_not_exists(self) -> None: return bucket_name = s3_parameters["bucket"] - region = s3_parameters.get("region") + region = s3_parameters.get("region", "") try: s3 = self._get_s3_session_resource(s3_parameters) @@ -567,8 +574,8 @@ def _initialise_stanza(self, event: HookEvent) -> bool: f"--stanza={self.stanza_name}", "stanza-create", ]) - except ExecError as e: - logger.exception(e) + except ExecError: + logger.exception("Failed to initialise stanza:") self._s3_initialization_set_failure(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE) return False @@ -607,8 +614,8 @@ def check_stanza(self) -> bool: with attempt: self._execute_command(["pgbackrest", f"--stanza={self.stanza_name}", "check"]) self.charm._set_active_status() - except Exception as e: - logger.exception(e) + except Exception: + logger.exception("Failed to check stanza:") self._s3_initialization_set_failure(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE) return False @@ -1252,7 +1259,6 @@ def _retrieve_s3_parameters(self) -> tuple[dict, list[str]]: # Add some sensible defaults (as expected by the code) for missing optional parameters s3_parameters.setdefault("endpoint", "https://s3.amazonaws.com") - s3_parameters.setdefault("region") s3_parameters.setdefault("path", "") s3_parameters.setdefault("s3-uri-style", "host") s3_parameters.setdefault("delete-older-than-days", "9999999") @@ -1365,7 +1371,7 @@ def _read_content_from_s3(self, s3_path: str, s3_parameters: dict) -> str | None with BytesIO() as buf: bucket.download_fileobj(processed_s3_path, buf) return buf.getvalue().decode("utf-8") - except botocore.exceptions.ClientError as e: + except ClientError as e: if e.response["Error"]["Code"] == "404": logger.info( f"No such object to read from S3 bucket={bucket_name}, path={processed_s3_path}" diff --git a/templates/pgbackrest.conf.j2 b/templates/pgbackrest.conf.j2 index 1606ca52bb..3bdcfdb393 100644 --- a/templates/pgbackrest.conf.j2 +++ b/templates/pgbackrest.conf.j2 @@ -6,7 +6,11 @@ repo1-retention-full={{ retention_full }} repo1-retention-history=365 repo1-type=s3 repo1-path={{ path }} +{%- if region %} repo1-s3-region={{ region }} +{% else %} +repo1-s3-region="" +{%- endif %} repo1-s3-endpoint={{ endpoint }} repo1-s3-bucket={{ bucket }} repo1-s3-uri-style={{ s3_uri_style }} diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 53323f3bee..ab31748337 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -42,7 +42,7 @@ def get_cloud_config(cloud: str) -> tuple[dict[str, str], dict[str, str]]: "endpoint": "https://storage.googleapis.com", "bucket": "data-charms-testing", "path": f"/postgresql-k8s/{uuid.uuid1()}", - "region": "", + "region": "us-east-1", }, { "access-key": os.environ["GCP_ACCESS_KEY"], "secret-key": os.environ["GCP_SECRET_KEY"], diff --git a/tests/integration/test_backups_ceph.py b/tests/integration/test_backups_ceph.py new file mode 100644 index 0000000000..3874d28c07 --- /dev/null +++ b/tests/integration/test_backups_ceph.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +import dataclasses +import json +import logging +import os +import socket +import subprocess +import time + +import boto3 +import botocore.exceptions +import pytest +from pytest_operator.plugin import OpsTest + +from .helpers import ( + backup_operations, +) +from .juju_ import juju_major_version + +logger = logging.getLogger(__name__) + +S3_INTEGRATOR_APP_NAME = "s3-integrator" +if juju_major_version < 3: + tls_certificates_app_name = "tls-certificates-operator" + tls_channel = "legacy/stable" + tls_config = {"generate-self-signed-certificates": "true", "ca-common-name": "Test CA"} +else: + tls_certificates_app_name = "self-signed-certificates" + tls_channel = "latest/stable" + tls_config = {"ca-common-name": "Test CA"} + +backup_id, value_before_backup, value_after_backup = "", None, None + + +@dataclasses.dataclass(frozen=True) +class ConnectionInformation: + access_key_id: str + secret_access_key: str + bucket: str + + +@pytest.fixture(scope="session") +def microceph(): + if not os.environ.get("CI") == "true": + raise Exception("Not running on CI. Skipping microceph installation") + logger.info("Setting up TLS certificates") + subprocess.run(["openssl", "genrsa", "-out", "./ca.key", "2048"], check=True) + subprocess.run( + [ + "openssl", + "req", + "-x509", + "-new", + "-nodes", + "-key", + "./ca.key", + "-days", + "1024", + "-out", + "./ca.crt", + "-outform", + "PEM", + "-subj", + "/C=US/ST=Denial/L=Springfield/O=Dis/CN=www.example.com", + ], + check=True, + ) + subprocess.run(["openssl", "genrsa", "-out", "./server.key", "2048"], check=True) + subprocess.run( + [ + "openssl", + "req", + "-new", + "-key", + "./server.key", + "-out", + "./server.csr", + "-subj", + "/C=US/ST=Denial/L=Springfield/O=Dis/CN=www.example.com", + ], + check=True, + ) + host_ip = socket.gethostbyname(socket.gethostname()) + subprocess.run( + f'echo "subjectAltName = IP:{host_ip}" > ./extfile.cnf', + shell=True, + check=True, + ) + subprocess.run( + [ + "openssl", + "x509", + "-req", + "-in", + "./server.csr", + "-CA", + "./ca.crt", + "-CAkey", + "./ca.key", + "-CAcreateserial", + "-out", + "./server.crt", + "-days", + "365", + "-extfile", + "./extfile.cnf", + ], + check=True, + ) + + logger.info("Setting up microceph") + subprocess.run( + ["sudo", "snap", "install", "microceph", "--channel", "squid/stable"], check=True + ) + subprocess.run(["sudo", "microceph", "cluster", "bootstrap"], check=True) + subprocess.run(["sudo", "microceph", "disk", "add", "loop,1G,3"], check=True) + subprocess.run( + 'sudo microceph enable rgw --ssl-certificate="$(sudo base64 -w0 ./server.crt)" --ssl-private-key="$(sudo base64 -w0 ./server.key)"', + shell=True, + check=True, + ) + output = subprocess.run( + [ + "sudo", + "microceph.radosgw-admin", + "user", + "create", + "--uid", + "test", + "--display-name", + "test", + ], + capture_output=True, + check=True, + encoding="utf-8", + ).stdout + key = json.loads(output)["keys"][0] + key_id = key["access_key"] + secret_key = key["secret_key"] + logger.info("Creating microceph bucket") + for attempt in range(3): + try: + boto3.client( + "s3", + endpoint_url=f"https://{host_ip}", + aws_access_key_id=key_id, + aws_secret_access_key=secret_key, + verify="./ca.crt", + ).create_bucket(Bucket=_BUCKET) + except botocore.exceptions.EndpointConnectionError: + if attempt == 2: + raise + # microceph is not ready yet + logger.info("Unable to connect to microceph via S3. Retrying") + time.sleep(1) + else: + break + logger.info("Set up microceph") + return ConnectionInformation(key_id, secret_key, _BUCKET) + + +_BUCKET = "testbucket" +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session") +def cloud_credentials(microceph: ConnectionInformation) -> dict[str, str]: + """Read cloud credentials.""" + return { + "access-key": microceph.access_key_id, + "secret-key": microceph.secret_access_key, + } + + +@pytest.fixture(scope="session") +def cloud_configs(microceph: ConnectionInformation): + host_ip = socket.gethostbyname(socket.gethostname()) + result = subprocess.run( + "sudo base64 -w0 ./ca.crt", shell=True, check=True, stdout=subprocess.PIPE, text=True + ) + base64_output = result.stdout + return { + "endpoint": f"https://{host_ip}", + "bucket": microceph.bucket, + "path": "/pg", + "region": "", + "s3-uri-style": "path", + "tls-ca-chain": f"{base64_output}", + } + + +async def test_backup_ceph(ops_test: OpsTest, cloud_configs, cloud_credentials, charm) -> None: + """Build and deploy two units of PostgreSQL in microceph, test backup and restore actions.""" + await backup_operations( + ops_test, + charm, + S3_INTEGRATOR_APP_NAME, + tls_certificates_app_name, + tls_config, + tls_channel, + cloud_credentials, + "ceph", + cloud_configs, + ) diff --git a/tests/spread/test_backups_ceph.py/task.yaml b/tests/spread/test_backups_ceph.py/task.yaml new file mode 100644 index 0000000000..22336bfea3 --- /dev/null +++ b/tests/spread/test_backups_ceph.py/task.yaml @@ -0,0 +1,7 @@ +summary: test_backups_ceph.py +environment: + TEST_MODULE: test_backups_ceph.py +execute: | + tox run -e integration -- "tests/integration/$TEST_MODULE" --model testing --alluredir="$SPREAD_TASK/allure-results" +artifacts: + - allure-results diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 2305916d3a..5dcf60ac1f 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -331,7 +331,7 @@ def test_create_bucket_if_not_exists(harness, tls_ca_chain_filename): new_callable=PropertyMock(return_value=tls_ca_chain_filename), ) as _tls_ca_chain_filename, patch("charm.PostgreSQLBackups._retrieve_s3_parameters") as _retrieve_s3_parameters, - patch("backups.botocore.client.Config") as _config, + patch("backups.Config") as _config, ): # Test when there are missing S3 parameters. _retrieve_s3_parameters.return_value = ([], ["bucket", "access-key", "secret-key"]) @@ -1891,7 +1891,6 @@ def test_retrieve_s3_parameters( "delete-older-than-days": "9999999", "endpoint": "https://s3.amazonaws.com", "path": "/", - "region": None, "s3-uri-style": "host", "secret-key": "test-secret-key", }, @@ -2014,8 +2013,8 @@ def test_upload_content_to_s3(harness, tls_ca_chain_filename): with ( patch("tempfile.NamedTemporaryFile") as _named_temporary_file, patch("charm.PostgreSQLBackups._construct_endpoint") as _construct_endpoint, - patch("boto3.session.Session.resource") as _resource, - patch("backups.botocore.client.Config") as _config, + patch("backups.Session.resource") as _resource, + patch("backups.Config") as _config, patch( "charm.PostgreSQLBackups._tls_ca_chain_filename", new_callable=PropertyMock(return_value=tls_ca_chain_filename), From ec73e2f5ade2c61d9adbf9426312798659bc27e0 Mon Sep 17 00:00:00 2001 From: Dragomir Penev Date: Sun, 14 Sep 2025 17:56:41 +0300 Subject: [PATCH 2/2] Add empty region block test --- tests/integration/test_backups_gcp.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/integration/test_backups_gcp.py b/tests/integration/test_backups_gcp.py index af581fba63..28890a3c5b 100644 --- a/tests/integration/test_backups_gcp.py +++ b/tests/integration/test_backups_gcp.py @@ -248,3 +248,18 @@ async def test_delete_pod(ops_test: OpsTest, gcp_cloud_configs: tuple[dict, dict ops_test, "/etc/pgbackrest.conf", f"{database_app_name}/0" ) assert original_pgbackrest_config == new_pgbackrest_config, "Pgbackrest config not rerendered" + + +async def test_block_on_missing_region( + ops_test: OpsTest, gcp_cloud_configs: tuple[dict, dict] +) -> None: + await ops_test.model.applications[S3_INTEGRATOR_APP_NAME].set_config({ + **gcp_cloud_configs[0], + "region": "", + }) + database_app_name = f"new-{DATABASE_APP_NAME}" + logger.info("waiting for the database charm to become blocked") + unit = ops_test.model.units.get(f"{database_app_name}/0") + await ops_test.model.block_until( + lambda: unit.workload_status_message == FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE + )