Skip to content

Commit

Permalink
systemd: Support Graceful Reboot for AIO Node
Browse files Browse the repository at this point in the history
Ceph AIO installation with single/multiple node is not friendly for
loopback mount, especially always get deadlock issue during graceful
system reboot.

We already have `rbdmap.service` with graceful system reboot friendly as
below:

    [Unit]
    After=network-online.target
    Before=remote-fs-pre.target
    Wants=network-online.target remote-fs-pre.target

    [Service]
    ExecStart=/usr/bin/rbdmap map
    ExecReload=/usr/bin/rbdmap map
    ExecStop=/usr/bin/rbdmap unmap-all

This PR introduce:

  - `ceph-mon.target`: Ensure startup after `network-online.target` and
    before `remote-fs-pre.target`
  - `ceph-*.target`: Ensure startup after `ceph-mon.target` and before
    `remote-fs-pre.target`
  - `rbdmap.service`: Once all `_netdev` get unmount by
    `remote-fs.target`, ensure unmap all RBD BEFORE any Ceph components
    under `ceph.target` get stopped during shutdown

The logic is concept proof by
<https://github.com/alvistack/ansible-role-ceph_common/tree/develop>;
also works as expected with Ceph + Kubernetes deployment by
<https://github.com/alvistack/ansible-collection-kubernetes/tree/develop>.
No more deadlock happened during graceful system reboot, both AIO
single/multiple no de with loopback mount.

Also see:

  - <#36776>
  - <etcd-io/etcd#12259>
  - <cri-o/cri-o#4128>
  - <kubernetes/release#1504>

Fixes: https://tracker.ceph.com/issues/47528
Signed-off-by: Wong Hoi Sing Edison <hswong3i@gmail.com>
  • Loading branch information
hswong3i committed Sep 18, 2020
1 parent ff16f5d commit d88c834
Show file tree
Hide file tree
Showing 15 changed files with 35 additions and 16 deletions.
1 change: 1 addition & 0 deletions systemd/ceph-fuse.target
Expand Up @@ -2,5 +2,6 @@
Description=ceph target allowing to start/stop all ceph-fuse@.service instances at once
PartOf=ceph.target
Before=ceph.target

[Install]
WantedBy=remote-fs.target ceph.target
1 change: 1 addition & 0 deletions systemd/ceph-immutable-object-cache.target
Expand Up @@ -2,5 +2,6 @@
Description=ceph target allowing to start/stop all ceph-immutable-object-cache@.service instances at once
PartOf=ceph.target
Before=ceph.target

[Install]
WantedBy=multi-user.target ceph.target
3 changes: 3 additions & 0 deletions systemd/ceph-mds.target
@@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-mds@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target

[Install]
WantedBy=multi-user.target ceph.target
5 changes: 3 additions & 2 deletions systemd/ceph-mds@.service.in
@@ -1,8 +1,9 @@
[Unit]
Description=Ceph metadata server daemon
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mds.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-mds.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mds.target

[Service]
Environment=CLUSTER=ceph
Expand Down
3 changes: 3 additions & 0 deletions systemd/ceph-mgr.target
@@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-mgr@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target

[Install]
WantedBy=multi-user.target ceph.target
7 changes: 3 additions & 4 deletions systemd/ceph-mgr@.service.in
@@ -1,8 +1,9 @@
[Unit]
Description=Ceph cluster manager daemon
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mgr.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-mgr.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mgr.target

[Service]
Environment=CLUSTER=ceph
Expand All @@ -28,11 +29,9 @@ RestartSec=10
RestrictSUIDSGID=true
StartLimitBurst=3
StartLimitInterval=30min

# We need to disable this protection as some python libraries generate
# dynamic code, like python-cffi, and require mmap calls to succeed
MemoryDenyWriteExecute=false


[Install]
WantedBy=ceph-mgr.target
2 changes: 2 additions & 0 deletions systemd/ceph-mon.target
Expand Up @@ -2,5 +2,7 @@
Description=ceph target allowing to start/stop all ceph-mon@.service instances at once
PartOf=ceph.target
Before=ceph.target
Wants=ceph.target

[Install]
WantedBy=multi-user.target ceph.target
6 changes: 3 additions & 3 deletions systemd/ceph-mon@.service.in
@@ -1,13 +1,13 @@
[Unit]
Description=Ceph cluster monitor daemon

PartOf=ceph-mon.target
# According to:
# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
# these can be removed once ceph-mon will dynamically change network
# configuration.
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-mon.target
Before=remote-fs-pre.target ceph-mon.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-mon.target

[Service]
Environment=CLUSTER=ceph
Expand Down
3 changes: 3 additions & 0 deletions systemd/ceph-osd.target
@@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-osd@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target

[Install]
WantedBy=multi-user.target ceph.target
5 changes: 3 additions & 2 deletions systemd/ceph-osd@.service.in
@@ -1,8 +1,9 @@
[Unit]
Description=Ceph object storage daemon osd.%i
After=network-online.target local-fs.target time-sync.target ceph-mon.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-osd.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-osd.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-osd.target

[Service]
Environment=CLUSTER=ceph
Expand Down
3 changes: 3 additions & 0 deletions systemd/ceph-radosgw.target
@@ -1,6 +1,9 @@
[Unit]
Description=ceph target allowing to start/stop all ceph-radosgw@.service instances at once
PartOf=ceph.target
After=ceph-mon.target
Before=ceph.target
Wants=ceph.target ceph-mon.target

[Install]
WantedBy=multi-user.target ceph.target
5 changes: 3 additions & 2 deletions systemd/ceph-radosgw@.service.in
@@ -1,8 +1,9 @@
[Unit]
Description=Ceph rados gateway
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
PartOf=ceph-radosgw.target
After=network-online.target local-fs.target time-sync.target
Before=remote-fs-pre.target ceph-radosgw.target
Wants=network-online.target local-fs.target time-sync.target remote-fs-pre.target ceph-radosgw.target

[Service]
Environment=CLUSTER=ceph
Expand Down
1 change: 1 addition & 0 deletions systemd/ceph-rbd-mirror.target
Expand Up @@ -2,5 +2,6 @@
Description=ceph target allowing to start/stop all ceph-rbd-mirror@.service instances at once
PartOf=ceph.target
Before=ceph.target

[Install]
WantedBy=multi-user.target ceph.target
1 change: 1 addition & 0 deletions systemd/ceph.target
@@ -1,4 +1,5 @@
[Unit]
Description=ceph target allowing to start/stop all ceph*@.service instances at once

[Install]
WantedBy=multi-user.target
5 changes: 2 additions & 3 deletions systemd/rbdmap.service.in
@@ -1,9 +1,8 @@
[Unit]
Description=Map RBD devices

After=network-online.target
After=network-online.target ceph.target
Before=remote-fs-pre.target
Wants=network-online.target remote-fs-pre.target
Wants=network-online.target remote-fs-pre.target ceph.target

[Service]
EnvironmentFile=-@SYSTEMD_ENV_FILE@
Expand Down

0 comments on commit d88c834

Please sign in to comment.