Skip to content


rolling_update: unmask monitor service after a failure
Browse files Browse the repository at this point in the history
if for some reason the playbook fails after the service was
stopped, disabled and masked and before it got restarted, enabled and
unmasked, the playbook leaves the service masked and which can make users
confused and forces them to unmask the unit manually.


Signed-off-by: Guillaume Abrioux <>
(cherry picked from commit 07029e1)
  • Loading branch information
guits committed Mar 29, 2021
1 parent 653d180 commit 82b934c
Showing 1 changed file with 153 additions and 133 deletions.
286 changes: 153 additions & 133 deletions infrastructure-playbooks/rolling_update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,150 +118,170 @@
serial: 1
become: True
- name: remove ceph aliases
path: /etc/profile.d/
state: absent
when: containerized_deployment | bool

- name: set mon_host_count
mon_host_count: "{{ groups[mon_group_name] | length }}"

- name: fail when less than three monitors
msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3

- name: select a running monitor
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"
- name: upgrade ceph mon cluster
- name: upgrade ceph mon cluster
- name: remove ceph aliases
path: /etc/profile.d/
state: absent
when: containerized_deployment | bool

- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
- name: set mon_host_count
mon_host_count: "{{ groups[mon_group_name] | length }}"

- block:
- name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"
- name: fail when less than three monitors
msg: "Upgrade of cluster with less than three monitors is not supported."
when: mon_host_count | int < 3

- block:
- name: display ceph health detail
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
delegate_to: "{{ mon_host }}"
- name: select a running monitor
mon_host: "{{ groups[mon_group_name] | difference([inventory_hostname]) | last }}"

- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts

- block:
- name: get ceph cluster status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health -f json"
register: check_cluster_health
delegate_to: "{{ mon_host }}"

- block:
- name: display ceph health detail
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} health detail"
delegate_to: "{{ mon_host }}"

- name: fail if cluster isn't in an acceptable state
msg: "cluster is not in an acceptable state!"
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
when: inventory_hostname == groups[mon_group_name] | first

- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
path: /var/lib/ceph/bootstrap-rbd-mirror
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]

- name: create potentially missing keys (rbd and rbd-mirror)
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]

- name: fail if cluster isn't in an acceptable state
msg: "cluster is not in an acceptable state!"
when: (check_cluster_health.stdout | from_json).status == 'HEALTH_ERR'
when: inventory_hostname == groups[mon_group_name] | first

- name: ensure /var/lib/ceph/bootstrap-rbd-mirror is present
path: /var/lib/ceph/bootstrap-rbd-mirror
owner: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
group: "{{ ceph_uid if containerized_deployment else 'ceph' }}"
mode: '755'
state: directory
delegate_to: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]

- name: create potentially missing keys (rbd and rbd-mirror)
name: "client.{{ item.0 }}"
dest: "/var/lib/ceph/{{ item.0 }}/"
mon: "allow profile {{ item.0 }}"
cluster: "{{ cluster }}"
delegate_to: "{{ item.1 }}"
- ['bootstrap-rbd', 'bootstrap-rbd-mirror']
- "{{ groups[mon_group_name] }}" # so the key goes on all the nodes
CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment else None }}"
CEPH_CONTAINER_BINARY: "{{ container_binary }}"
- cephx | bool
- inventory_hostname == groups[mon_group_name][0]
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - shortname
name: ceph-mon@{{ ansible_facts['hostname'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True

# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - shortname
name: ceph-mon@{{ ansible_facts['hostname'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True
# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - fqdn
name: ceph-mon@{{ ansible_facts['fqdn'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True

# NOTE: we mask the service so the RPM can't restart it
# after the package gets upgraded
- name: stop ceph mon - fqdn
name: ceph-mon@{{ ansible_facts['fqdn'] }}
state: stopped
enabled: no
masked: yes
ignore_errors: True
# only mask the service for mgr because it must be upgraded
# after ALL monitors, even when collocated
- name: mask the mgr service
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: yes
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0

# only mask the service for mgr because it must be upgraded
# after ALL monitors, even when collocated
- name: mask the mgr service
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: yes
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0
- import_role:
name: ceph-handler
- import_role:
name: ceph-common
when: not containerized_deployment | bool
- import_role:
name: ceph-container-common
when: containerized_deployment | bool
- import_role:
name: ceph-config
- import_role:
name: ceph-mon

- import_role:
name: ceph-handler
- import_role:
name: ceph-common
when: not containerized_deployment | bool
- import_role:
name: ceph-container-common
when: containerized_deployment | bool
- import_role:
name: ceph-config
- import_role:
name: ceph-mon
- name: start ceph mgr
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: started
enabled: yes
ignore_errors: True # if no mgr collocated with mons

- name: non container | waiting for the monitor to join the quorum...
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool

- name: start ceph mgr
name: ceph-mgr@{{ ansible_facts['hostname'] }}
state: started
enabled: yes
ignore_errors: True # if no mgr collocated with mons

- name: non container | waiting for the monitor to join the quorum...
command: ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: not containerized_deployment | bool
- name: container | waiting for the containerized monitor to join the quorum...
command: >
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: containerized_deployment | bool

- name: container | waiting for the containerized monitor to join the quorum...
command: >
{{ container_binary }} exec ceph-mon-{{ ansible_facts['hostname'] }} ceph --cluster "{{ cluster }}" -m "{{ hostvars[groups[mon_group_name][0]]['_current_monitor_address'] }}" quorum_status --format json
register: ceph_health_raw
- ceph_health_raw.rc == 0
- (hostvars[inventory_hostname]['ansible_facts']['hostname'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"] or
hostvars[inventory_hostname]['ansible_facts']['fqdn'] in (ceph_health_raw.stdout | default('{}') | from_json)["quorum_names"])
retries: "{{ health_mon_check_retries }}"
delay: "{{ health_mon_check_delay }}"
when: containerized_deployment | bool
- name: unmask the mon service
name: ceph-mon@{{ item }}
enabled: yes
masked: no
- "{{ ansible_facts['hostname'] }}"
- "{{ ansible_facts['fqdn'] }}"

- name: unmask the mgr service
name: ceph-mgr@{{ ansible_facts['hostname'] }}
masked: no
when: inventory_hostname in groups[mgr_group_name] | default([])
or groups[mgr_group_name] | default([]) | length == 0

- name: reset mon_host
hosts: "{{ mon_group_name|default('mons') }}"
Expand Down

0 comments on commit 82b934c

Please sign in to comment.