Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,24 @@ jobs:
uses: actions/checkout@v5
- name: Set up environment
run: |
sudo snap install charmcraft --classic
sudo snap install go --classic
go install github.com/snapcore/spread/cmd/spread@latest
pipx install tox poetry
- name: Collect spread jobs
id: collect-jobs
shell: python
run: |
import json
import pathlib
import os
import subprocess

spread_jobs = (
subprocess.run(
["charmcraft", "test", "--list", "github-ci"], capture_output=True, check=True, text=True
[pathlib.Path.home() / "go/bin/spread", "-list", "github-ci"],
capture_output=True,
check=True,
text=True,
)
.stdout.strip()
.split("\n")
Expand Down
18 changes: 16 additions & 2 deletions src/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import os
import re
import signal
import tempfile
import time
from datetime import UTC, datetime
Expand All @@ -26,7 +27,7 @@
from ops.framework import Object
from ops.jujuversion import JujuVersion
from ops.model import ActiveStatus, MaintenanceStatus
from ops.pebble import ChangeError, ExecError
from ops.pebble import ChangeError, ExecError, ServiceStatus
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed

from constants import (
Expand Down Expand Up @@ -820,6 +821,8 @@ def _on_create_backup_action(self, event) -> None:
"pgbackrest",
f"--stanza={self.stanza_name}",
"--log-level-console=debug",
"--log-level-file=debug",
"--log-subprocess",
f"--type={BACKUP_TYPE_OVERRIDES[backup_type]}",
"backup",
]
Expand Down Expand Up @@ -1303,7 +1306,18 @@ def start_stop_pgbackrest_service(self) -> bool:
return False

# Start the service.
self.container.restart(self.charm.pgbackrest_server_service)
services = self.container.pebble.get_services(names=[self.charm.pgbackrest_server_service])
if len(services) == 0:
return False

if services[0].current == ServiceStatus.ACTIVE:
logger.debug("Sending SIGHUP to pgBackRest TLS server to reload configuration")
self.container.pebble.send_signal(
signal.SIGHUP, services=[self.charm.pgbackrest_server_service]
)
else:
logger.debug("Starting pgBackRest TLS server service")
self.container.restart(self.charm.pgbackrest_server_service)
return True

def _upload_content_to_s3(
Expand Down
79 changes: 39 additions & 40 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
AuthorisationRulesChangeCharmEvents,
AuthorisationRulesObserver,
)
from refresh import PostgreSQLRefresh

# First platform-specific import, will fail on wrong architecture
try:
Expand Down Expand Up @@ -144,7 +145,6 @@
)
from ldap import PostgreSQLLDAP
from patroni import NotReadyError, Patroni, SwitchoverFailedError, SwitchoverNotSyncError
from refresh import PostgreSQLRefresh
from relations.async_replication import (
REPLICATION_CONSUMER_RELATION,
REPLICATION_OFFER_RELATION,
Expand Down Expand Up @@ -212,16 +212,6 @@ def __init__(self, *args):
if isinstance(handler, JujuLogHandler):
handler.setFormatter(logging.Formatter("{name}:{message}", style="{"))

# Support for disabling the operator.
disable_file = Path(f"{os.environ.get('CHARM_DIR')}/disable")
if disable_file.exists():
logger.warning(
f"\n\tDisable file `{disable_file.resolve()}` found, the charm will skip all events."
"\n\tTo resume normal operations, please remove the file."
)
self.set_unit_status(BlockedStatus("Disabled"))
sys.exit(0)

self.peer_relation_app = DataPeerData(
self.model,
relation_name=PEER,
Expand Down Expand Up @@ -280,27 +270,12 @@ def __init__(self, *args):
self.restart_manager = RollingOpsManager(
charm=self, relation="restart", callback=self._restart
)
self._observer.start_authorisation_rules_observer()
self.grafana_dashboards = GrafanaDashboardProvider(self)
self.metrics_endpoint = MetricsEndpointProvider(
self,
refresh_event=[self.on.start],
jobs=self._generate_metrics_jobs(self.is_tls_enabled),
)
self.loki_push = LogProxyConsumer(
self,
logs_scheme={"postgresql": {"log-files": POSTGRES_LOG_FILES}},
relation_name="logging",
)

if self.model.juju_version.supports_open_port_on_k8s:
try:
self.unit.set_ports(5432, 8008)
except ModelError:
logger.exception("failed to open port")
self.tracing = TracingEndpointRequirer(
self, relation_name=TRACING_RELATION_NAME, protocols=[TRACING_PROTOCOL]
)

try:
self.refresh = charm_refresh.Kubernetes(
Expand All @@ -311,17 +286,24 @@ def __init__(self, *args):
_charm=self,
)
)
except charm_refresh.KubernetesJujuAppNotTrusted:
sys.exit()
except charm_refresh.PeerRelationNotReady:
if self.unit.is_leader():
self.app.status = MaintenanceStatus("Waiting for peer relation")
sys.exit()
except charm_refresh.UnitTearingDown:
self.set_unit_status(MaintenanceStatus("Tearing down"))
sys.exit()
except (
charm_refresh.KubernetesJujuAppNotTrusted,
charm_refresh.PeerRelationNotReady,
charm_refresh.UnitTearingDown,
):
self.refresh = None
self._reconcile_refresh_status()

# Support for disabling the operator.
disable_file = Path(f"{os.environ.get('CHARM_DIR')}/disable")
if disable_file.exists():
logger.warning(
f"\n\tDisable file `{disable_file.resolve()}` found, the charm will skip all events."
"\n\tTo resume normal operations, please remove the file."
)
self.set_unit_status(BlockedStatus("Disabled"))
sys.exit(0)

if (
self.refresh is not None
and self.refresh.workload_allowed_to_start
Expand All @@ -332,6 +314,22 @@ def __init__(self, *args):
else:
self.refresh.next_unit_allowed_to_refresh = True

self._observer.start_authorisation_rules_observer()
self.grafana_dashboards = GrafanaDashboardProvider(self)
self.metrics_endpoint = MetricsEndpointProvider(
self,
refresh_event=[self.on.start],
jobs=self._generate_metrics_jobs(self.is_tls_enabled),
)
self.loki_push = LogProxyConsumer(
self,
logs_scheme={"postgresql": {"log-files": POSTGRES_LOG_FILES}},
relation_name="logging",
)
self.tracing = TracingEndpointRequirer(
self, relation_name=TRACING_RELATION_NAME, protocols=[TRACING_PROTOCOL]
)

def reconcile(self):
"""Reconcile the unit state on refresh."""
self.set_unit_status(MaintenanceStatus("starting services"))
Expand Down Expand Up @@ -366,8 +364,9 @@ def reconcile(self):
BlockedStatus("upgrade failed. Check logs for rollback instruction")
)
else:
self.refresh.next_unit_allowed_to_refresh = True
self.set_unit_status(ActiveStatus())
if self.refresh is not None:
self.refresh.next_unit_allowed_to_refresh = True
self.set_unit_status(ActiveStatus())

def _reconcile_refresh_status(self, _=None):
if self.unit.is_leader():
Expand Down Expand Up @@ -1094,7 +1093,7 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
return

# Create resources and add labels needed for replication.
if not self.refresh.in_progress:
if self.refresh is not None and not self.refresh.in_progress:
try:
self._create_services()
except ApiError:
Expand Down Expand Up @@ -1296,7 +1295,7 @@ def _initialize_cluster(self, event: HookEvent) -> bool:
return False

# Create resources and add labels needed for replication
if not self.refresh.in_progress:
if self.refresh is not None and not self.refresh.in_progress:
try:
self._create_services()
except ApiError:
Expand Down Expand Up @@ -1608,7 +1607,7 @@ def _fix_pod(self) -> None:
# Recreate k8s resources and add labels required for replication
# when the pod loses them (like when it's deleted).
self.push_tls_files_to_workload()
if not self.refresh.in_progress:
if self.refresh is not None and not self.refresh.in_progress:
try:
self._create_services()
except ApiError:
Expand Down
44 changes: 41 additions & 3 deletions tests/unit/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,12 @@ def test_retrieve_s3_parameters(


def test_start_stop_pgbackrest_service(harness):
# Enable Pebble connectivity
harness.set_can_connect("postgresql", True)

# Get the container to set up pebble mocking
container = harness.model.unit.get_container("postgresql")

with (
patch(
"charm.PostgreSQLBackups._is_primary_pgbackrest_service_running",
Expand All @@ -1915,6 +1921,7 @@ def test_start_stop_pgbackrest_service(harness):
"charm.PostgreSQLBackups._render_pgbackrest_conf_file"
) as _render_pgbackrest_conf_file,
patch("charm.PostgreSQLBackups._are_backup_settings_ok") as _are_backup_settings_ok,
patch.object(container.pebble, "send_signal") as _send_signal,
):
# Test when S3 parameters are not ok (no operation, but returns success).
_are_backup_settings_ok.return_value = (False, "fake error message")
Expand Down Expand Up @@ -1961,18 +1968,49 @@ def test_start_stop_pgbackrest_service(harness):
_stop.assert_not_called()
_restart.assert_not_called()

# Test when the service has already started in the primary.
# Test when the service has already started in the primary and is ACTIVE.
# This should send SIGHUP signal to reload configuration.
_is_primary_pgbackrest_service_running.return_value = True

# Add pgbackrest service to Pebble plan using the container's pebble client
container.pebble.add_layer(
"pgbackrest",
{
"services": {
harness.charm.pgbackrest_server_service: {
"override": "replace",
"summary": "pgbackrest server",
"command": "/bin/true",
"startup": "enabled",
}
}
},
)

# Start the service to make it ACTIVE
container.start(harness.charm.pgbackrest_server_service)

assert harness.charm.backup.start_stop_pgbackrest_service() is True
_stop.assert_not_called()
_restart.assert_called_once()
_restart.assert_not_called()
_send_signal.assert_called_once()

# Test when this unit is the primary.
# Test when this unit is the primary and service is NOT ACTIVE (INACTIVE).
# This should restart the service.
_restart.reset_mock()
_send_signal.reset_mock()
_is_primary.return_value = True
_is_primary_pgbackrest_service_running.return_value = False

# Stop the service using the Testing Pebble Client's stop_services method
container.pebble.stop_services([harness.charm.pgbackrest_server_service])

# Reset the stop mock (it was called by the testing framework when stopping)
_stop.reset_mock()

assert harness.charm.backup.start_stop_pgbackrest_service() is True
_stop.assert_not_called()
_send_signal.assert_not_called()
_restart.assert_called_once()


Expand Down
Loading