infrastructure-playbooks/purge-cluster.yml

---
# This playbook purges Ceph
# It removes: packages, configuration files and ALL THE DATA
#
# Use it like this:
# ansible-playbook purge-cluster.yml
#     Prompts for confirmation to purge, defaults to no and
#     doesn't purge the cluster. yes purges the cluster.
#
# ansible-playbook -e ireallymeanit=yes|no purge-cluster.yml
#     Overrides the prompt using -e option. Can be used in
#     automation scripts to avoid interactive prompt.

- name: confirm whether user really meant to purge the cluster
  hosts: localhost
  gather_facts: false

  vars_prompt:
    - name: ireallymeanit
      prompt: Are you sure you want to purge the cluster?
      default: 'no'
      private: no

  tasks:
  - name: exit playbook, if user did not mean to purge cluster
    fail:
      msg: >
        "Exiting purge-cluster playbook, cluster was NOT purged.
         To purge the cluster, either say 'yes' on the prompt or
         or use `-e ireallymeanit=yes` on the command line when
         invoking the playbook"
    when: ireallymeanit != 'yes'

- name: gather facts on all hosts

  hosts:
    - "{{ mon_group_name|default('mons') }}"
    - "{{ osd_group_name|default('osds') }}"
    - "{{ mds_group_name|default('mdss') }}"
    - "{{ rgw_group_name|default('rgws') }}"
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"
    - "{{ nfs_group_name|default('nfss') }}"
    - "{{ client_group_name|default('clients') }}"

  become: true

  tasks:
    - debug: msg="gather facts on all Ceph hosts for following reference"

- name: purge ceph mds cluster

  vars:
    mds_group_name: mdss

  hosts:
    - "{{ mds_group_name|default('mdss') }}"

  gather_facts: false # Already gathered previously

  become: true

  tasks:

  - name: stop ceph mdss with systemd
    service:
      name: ceph-mds@{{ ansible_hostname }}
      state: stopped
      enabled: no
    when: ansible_service_mgr == 'systemd'

  - name: stop ceph mdss
    shell: "service ceph status mds ; if [ $? == 0 ] ; then service ceph stop mds ; else echo ; fi"
    when: ansible_service_mgr == 'sysvinit'

  - name: stop ceph mdss on ubuntu
    command: initctl stop ceph-mds cluster={{ cluster }} id={{ ansible_hostname }}
    failed_when: false
    when: ansible_service_mgr == 'upstart'

- name: purge ceph rgw cluster

  vars:
    rgw_group_name: rgws

  hosts:
    - "{{ rgw_group_name|default('rgws') }}"

  gather_facts: false # Already gathered previously

  become: true

  tasks:

  - name: stop ceph rgws with systemd
    service:
      name: ceph-radosgw@rgw.{{ ansible_hostname }}
      state: stopped
      enabled: no
    when: ansible_service_mgr == 'systemd'

  - name: stop ceph rgws
    shell: "service ceph-radosgw status ; if [ $? == 0 ] ; then service ceph-radosgw stop ; else echo ; fi"
    when: ansible_service_mgr == 'sysvinit'

  - name: stop ceph rgws on ubuntu
    command: initctl stop radosgw cluster={{ cluster }} id={{ ansible_hostname }}
    failed_when: false
    when: ansible_service_mgr == 'upstart'


- name: purge ceph rbd-mirror cluster

  vars:
    rbdmirror_group_name: rbd-mirrors

  hosts:
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"

  gather_facts: false # Already gathered previously

  become: true

  tasks:

  - name: stop ceph rbd mirror with systemd
    service:
      name: ceph-rbd-mirror@admin.service
      state: stopped
    when: ansible_service_mgr == 'systemd'

  - name: stop ceph rbd mirror on ubuntu
    command: initctl stop ceph-rbd-mirror cluster={{ cluster }} id=admin
    failed_when: false
    when: ansible_service_mgr == 'upstart'


- name: purge ceph nfs cluster

  vars:
    nfs_group_name: nfss

  hosts:
    - "{{ nfs_group_name|default('nfss') }}"

  gather_facts: false # Already gathered previously

  become: true

  tasks:

  - name: stop ceph nfss with systemd
    service:
      name: nfs-ganesha
      state: stopped
    when: ansible_service_mgr == 'systemd'

  - name: stop ceph nfss
    shell: "service nfs-ganesha status ; if [ $? == 0 ] ; then service nfs-ganesha stop ; else echo ; fi"
    when: ansible_service_mgr == 'sysvinit'

  - name: stop ceph nfss on ubuntu
    command: initctl stop nfs-ganesha
    failed_when: false
    when: ansible_service_mgr == 'upstart'


- name: purge ceph osd cluster

  vars:
    osd_group_name: osds

  hosts:
    - "{{ osd_group_name|default('osds') }}"

  gather_facts: false # Already gathered previously

  become: true

  handlers:
  - name: restart machine
    shell: sleep 2 && shutdown -r now "Ansible updates triggered"
    async: 1
    poll: 0
    ignore_errors: true

  - name: wait for server to boot
    become: false
    local_action: wait_for port=22 host={{ inventory_hostname }} state=started delay=10 timeout=500

  - name: remove data
    file:
     path: /var/lib/ceph
     state: absent

  tasks:

  - name: get osd numbers
    shell: "if [ -d /var/lib/ceph/osd ] ; then ls /var/lib/ceph/osd | sed 's/.*-//' ; fi"
    register: osd_ids
    changed_when: false

  - name: stop ceph-osd with systemd
    service:
      name: ceph-osd@{{item}}
      state: stopped
      enabled: no
    with_items: "{{ osd_ids.stdout_lines }}"
    when: ansible_service_mgr == 'systemd'

  # before infernalis release, using sysvinit scripts
  # we use this test so we do not have to know which RPM contains the boot script
  # or where it is placed.

  - name: stop ceph osds
    shell: "service ceph status osd ; if [ $? == 0 ] ; then service ceph stop osd ; else echo ; fi"
    when: ansible_service_mgr == 'sysvinit'

  - name: stop ceph osds on ubuntu
    command: initctl stop ceph-osd cluster={{ cluster }} id={{ item }}
    failed_when: false
    when: ansible_service_mgr == 'upstart'
    with_items: "{{ osd_ids.stdout_lines }}"

  - name: remove ceph udev rules
    file:
      path: "{{ item }}"
      state: absent
    with_items:
      - /usr/lib/udev/rules.d/95-ceph-osd.rules
      - /usr/lib/udev/rules.d/60-ceph-by-parttypeuuid.rules

  - name: see if ceph-disk-created data partitions are present
    shell: |
      ls /dev/disk/by-partlabel | grep -q "ceph.*.data"
    failed_when: false
    register: ceph_data_partlabels

  - name: see if ceph-disk-created journal partitions are present
    shell: |
      ls /dev/disk/by-partlabel | grep -q "ceph.*.journal"
    failed_when: false
    register: ceph_journal_partlabels

  - name: see if ceph-disk-created block db partitions are present
    shell: |
      ls /dev/disk/by-partlabel | grep -q "ceph.*.block.db"
    failed_when: false
    register: ceph_db_partlabels

  - name: see if ceph-disk-created block wal partitions are present
    shell: |
      ls /dev/disk/by-partlabel | grep -q "ceph.*.block.wal"
    failed_when: false
    register: ceph_wal_partlabels

  - name: see if ceph-disk-created lockbox partitions are present
    shell: |
      ls /dev/disk/by-partlabel | grep -q "ceph.*.lockbox"
    failed_when: false
    register: ceph_lockbox_partlabels

#   Initial attempt, doing everything in Ansible...
#  - name: see if encrypted partitions are present
#    shell: blkid -t TYPE=crypto_LUKS -o value -s PARTUUID
#    register: encrypted_partuuid
#
#  - name: find if these encrypted partitions are ceph data partitions
#    shell: blkid -t PARTLABEL="ceph data" -o value -s PARTUUID $(blkid -U {{ item }})
#    failed_when: false
#    with_items: "{{ encrypted_partuuid.stdout_lines }}"
#    when: "{{ encrypted_partuuid | length > 0 }}"
#    register: encrypted_partuuid_ceph_data
#
#  - name: find if these encrypted partitions are ceph journal partitions
#    shell: blkid -t PARTLABEL="ceph journal" -o value -s PARTUUID $(blkid -U {{ item }})
#    failed_when: false
#    with_items: "{{ encrypted_partuuid.stdout_lines }}"
#    when: "{{ encrypted_partuuid | length > 0 }}"
#    register: encrypted_partuuid_ceph_journal
#
#  - name: merge the list of ceph encrypted partitions
#    set_fact:
#      encrypted_partuuid_ceph: "{{ encrypted_partuuid_ceph_data + encrypted_partuuid_ceph_journal }}"

  # NOTE(leseb): hope someone will find a more elegant way one day...
  - name: see if encrypted partitions are present
    shell: |
      blkid -t TYPE=crypto_LUKS -s PARTLABEL -s PARTUUID | grep "ceph.*." | grep -o PARTUUID.* | cut -d '"' -f 2
    register: encrypted_ceph_partuuid

  - name: get osd data and lockbox mount points
    shell: "(grep /var/lib/ceph /proc/mounts || echo -n) | awk '{ print $2 }'"
    register: mounted_osd
    changed_when: false

  - name: drop all cache
    shell: "sync && sleep 1 && echo 3 > /proc/sys/vm/drop_caches"

  - name: umount osd data partition
    shell: umount {{ item }}
    with_items: "{{ mounted_osd.stdout_lines }}"

  - name: remove osd mountpoint tree
    file:
      path: /var/lib/ceph/osd/
      state: absent
    register: remove_osd_mountpoints
    ignore_errors: true

  - name: is reboot needed
    local_action: shell echo requesting reboot
    become: false
    notify:
      - restart machine
      - wait for server to boot
      - remove data
    when: remove_osd_mountpoints.failed is defined

  - name: see if ceph-disk is installed
    shell: "which ceph-disk"
    failed_when: false
    register: ceph_disk_present

  - name: delete dm-crypt devices if any
    command: dmsetup remove --retry --force {{ item }}
    with_items: "{{ encrypted_ceph_partuuid.stdout_lines }}"
    when: "{{ encrypted_ceph_partuuid.stdout_lines | length > 0 }}"

  - name: get ceph data partitions
    shell: |
      blkid | awk -F: '/ceph data/ { print $1 }'
    when: ceph_data_partlabels.rc == 0
    failed_when: false
    register: ceph_data_partition_to_erase_path

  - name: get ceph lockbox partitions
    shell: |
      blkid | awk '/ceph lockbox/ { sub (":", "", $1); print $1 }'
    when: ceph_lockbox_partlabels.rc == 0
    failed_when: false
    register: ceph_lockbox_partition_to_erase_path

  - name: zap osd disks
    shell: |
      if (echo "{{ item }}" | grep -Esq '[0-9]{1,2}$'); then
        raw_device=$(echo "{{ item }}" | grep -Eo '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}')
        partition_nb=$(echo "{{ item }}" | grep -Eo '[0-9]{1,2}$')
        sgdisk --delete $partition_nb $raw_device
        udevadm settle --timeout=600
      else
        ceph-disk zap "{{ item }}"
        udevadm settle --timeout=600
      fi
    with_items:
      - "{{ ceph_data_partition_to_erase_path.stdout_lines | default([]) }}"
      - "{{ ceph_lockbox_partition_to_erase_path.stdout_lines | default([]) }}"
    when:
      - ceph_disk_present.rc == 0
      - ceph_data_partlabels.rc == 0

  # this should go away once 'ceph-volume lvm zap' is available
  - name: remove osd logical volumes
    command: "lvremove -f {{ item.data_vg }}/{{ item.data }}"
    with_items: "{{ lvm_volumes }}"
    when:
      - osd_scenario == "lvm"

  # this should go away once 'ceph-volume lvm zap' is available
  - name: remove osd lvm journals
    command: "lvremove -f {{ item.journal_vg }}/{{ item.journal }}"
    with_items: "{{ lvm_volumes }}"
    # journals might be logical volumes, but they could also be
    # devices so fail silently if this doesn't work
    failed_when: false
    when:
      - osd_scenario == "lvm"
      - item.journal_vg is defined

  - name: get ceph journal partitions
    shell: |
      blkid | awk '/ceph journal/ { sub (":", "", $1); print $1 }'
    when: ceph_journal_partlabels.rc == 0
    failed_when: false
    register: ceph_journal_partition_to_erase_path

  - name: get ceph db partitions
    shell: |
      blkid | awk '/ceph block.db/ { sub (":", "", $1); print $1 }'
    when: ceph_db_partlabels.rc == 0
    failed_when: false
    register: ceph_db_partition_to_erase_path

  - name: get ceph wal partitions
    shell: |
      blkid | awk '/ceph block.wal/ { sub (":", "", $1); print $1 }'
    when: ceph_wal_partlabels.rc == 0
    failed_when: false
    register: ceph_wal_partition_to_erase_path

  - name: zap ceph journal/block db/block wal partitions
    shell: |
      # if the disk passed is a raw device AND the boot system disk
      if echo "{{ item }}" | egrep -sq '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]p|nvme[0-9]n[0-9]p){1,2}$' && parted -s $(echo "{{ item }}" | egrep -o '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]p|nvme[0-9]n[0-9]p){1,2}') print | grep -sq boot; then
        echo "Looks like {{ item }} has a boot partition,"
        echo "if you want to delete specific partitions point to the partition instead of the raw device"
        echo "Do not use your system disk!"
        exit 1
      fi
      raw_device=$(echo "{{ item }}" | egrep -o '/dev/([hsv]d[a-z]{1,2}|cciss/c[0-9]d[0-9]|nvme[0-9]n[0-9]){1,2}')
      partition_nb=$(echo "{{ item }}" | egrep -o '[0-9]{1,2}$')
      sgdisk --delete $partition_nb $raw_device
    with_items:
      - "{{ ceph_journal_partition_to_erase_path.stdout_lines | default([]) }}"
      - "{{ ceph_db_partition_to_erase_path.stdout_lines | default([]) }}"
      - "{{ ceph_wal_partition_to_erase_path.stdout_lines | default([]) }}"
    when:
      - (ceph_journal_partlabels.rc == 0 or ceph_db_partlabels.rc == 0 or ceph_wal_partlabels.rc == 0)
      - osd_scenario == 'non-collocated'

- name: purge ceph mon cluster

  vars:
    mon_group_name:       mons
    restapi_group_name:   restapis

  hosts:
    - "{{ mon_group_name|default('mons') }}"

  gather_facts: false # already gathered previously

  become: true

  tasks:

  - name: stop ceph mons with systemd
    service:
      name: ceph-mon@{{ ansible_hostname }}
      state: stopped
      enabled: no
    when: ansible_service_mgr == 'systemd'

  - name: stop ceph mons
    shell: "service ceph status mon ; if [ $? == 0 ] ; then service ceph stop mon ; else echo ; fi"
    when: ansible_service_mgr == 'sysvinit'

  - name: stop ceph mons on ubuntu
    command: initctl stop ceph-mon cluster={{ cluster }} id={{ ansible_hostname }}
    failed_when: false
    when: ansible_service_mgr == 'upstart'

  - name: remove monitor store and bootstrap keys
    file:
      path: "{{ item }}"
      state: absent
    with_items:
      - /var/lib/ceph/mon
      - /var/lib/ceph/bootstrap-mds
      - /var/lib/ceph/bootstrap-osd
      - /var/lib/ceph/bootstrap-rgw
      - /var/lib/ceph/bootstrap-rbd
      - /var/lib/ceph/bootstrap-mgr
      - /var/lib/ceph/tmp


- name: purge iscsi gateway(s)

  vars:
    igw_purge_type: all

  hosts:
    - "{{ iscsi_gw_group_name|default('iscsi-gw') }}"

  gather_facts: false # already gathered previously

  become: true

  tasks:

  - name: igw_purge | purging the gateway configuration
    igw_purge:
      mode: "gateway"

  - name: igw_purge | deleting configured rbd devices
    igw_purge:
      mode: "disks"
    when:
      - igw_purge_type == 'all'

  - name: restart rbd-target-gw daemons
    service:
      name: rbd-target-gw
      state: restarted
    when:
      - ansible_service_mgr == 'systemd'


- name: final cleanup - check any running ceph, purge ceph packages, purge config and remove data

  vars:
    # When set to true both groups of packages are purged.
    # This can cause problem with qemu-kvm
    purge_all_packages: true

    ceph_packages:
      - ceph
      - ceph-common
      - ceph-fs-common
      - ceph-fuse
      - ceph-mds
      - ceph-release
      - ceph-radosgw
      - calamari-server

    ceph_remaining_packages:
      - libcephfs1
      - libcephfs2
      - librados2
      - libradosstriper1
      - librbd1
      - python-cephfs
      - python-rados
      - python-rbd

  hosts:
    - "{{ mon_group_name|default('mons') }}"
    - "{{ osd_group_name|default('osds') }}"
    - "{{ mds_group_name|default('mdss') }}"
    - "{{ rgw_group_name|default('rgws') }}"
    - "{{ rbdmirror_group_name|default('rbdmirrors') }}"
    - "{{ nfs_group_name|default('nfss') }}"
    - "{{ client_group_name|default('clients') }}"

  gather_facts: false # Already gathered previously

  become: true

  handlers:
  - name: get osd data and lockbox mount points
    shell: "(grep /var/lib/ceph /proc/mounts || echo -n) | awk '{ print $2 }'"
    register: mounted_osd
    changed_when: false
    listen: "remove data"

  - name: umount osd data partition
    shell: umount {{ item }}
    with_items: "{{ mounted_osd.stdout_lines }}"
    listen: "remove data"

  - name: remove data
    file:
     path: /var/lib/ceph
     state: absent
    listen: "remove data"

  tasks:

  - name: purge ceph packages with yum
    yum:
      name: "{{ item }}"
      state: absent
    with_items: "{{ ceph_packages }}"
    when: ansible_pkg_mgr == 'yum'

  - name: purge ceph packages with dnf
    dnf:
      name: "{{ item }}"
      state: absent
    with_items: "{{ ceph_packages }}"
    when: ansible_pkg_mgr == 'dnf'

  - name: purge ceph packages with apt
    apt:
      name: "{{ item }}"
      state: absent
    with_items: "{{ ceph_packages }}"
    when: ansible_pkg_mgr == 'apt'

  - name: purge remaining ceph packages with yum
    yum:
      name: "{{ item }}"
      state: absent
    with_items: "{{ ceph_remaining_packages }}"
    when:
      - ansible_pkg_mgr == 'yum'
      - purge_all_packages == true

  - name: purge remaining ceph packages with dnf
    dnf:
      name: "{{ item }}"
      state: absent
    with_items: "{{ ceph_remaining_packages }}"
    when:
      - ansible_pkg_mgr == 'dnf'
      - purge_all_packages == true

  - name: purge remaining ceph packages with apt
    apt:
      name: "{{ item }}"
      state: absent
    with_items: "{{ ceph_remaining_packages }}"
    when:
      - ansible_pkg_mgr == 'apt'
      - purge_all_packages == true

  - name: remove config
    file:
     path: /etc/ceph
     state: absent

  - name: remove logs
    file:
     path: /var/log/ceph
     state: absent

  - name: request data removal
    local_action: shell echo requesting data removal
    become: false
    notify:
      - remove data

  - name: purge dnf cache
    command: dnf clean all
    when: ansible_pkg_mgr == 'dnf'

  - name: purge rpm cache in /tmp
    file:
      path: /tmp/rh-storage-repo
      state: absent

  - name: clean apt
    shell: apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
    when: ansible_pkg_mgr == 'apt'

  - name: purge rh_storage.repo file in /etc/yum.repos.d
    file:
      path: /etc/yum.repos.d/rh_storage.repo
      state: absent
    when: ansible_os_family == 'RedHat'

  - name: check for anything running ceph
    command: "ps -u ceph -U ceph"
    register: check_for_running_ceph
    failed_when: check_for_running_ceph.rc == 0

  - name: remove ceph systemd unit files
    shell: rm -rf /etc/systemd/system/ceph*
    changed_when: false
    when: ansible_service_mgr == 'systemd'


- name: purge fetch directory

  hosts:
    - localhost

  gather_facts: false

  tasks:

  - name: set fetch_directory value if not set
    set_fact:
      fetch_directory: "fetch/"
    when: fetch_directory is not defined

  - name: purge fetch directory for localhost
    file:
      path: "{{ fetch_directory }}"
      state: absent