-
Notifications
You must be signed in to change notification settings - Fork 1k
/
shrink-mon.yml
147 lines (123 loc) · 5.25 KB
/
shrink-mon.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
---
# This playbook shrinks the Ceph monitors from your cluster
# It can remove a Ceph of monitor from the cluster and ALL ITS DATA
#
# Use it like this:
# ansible-playbook shrink-mon.yml -e mon_to_kill=ceph-mon01
# Prompts for confirmation to shrink, defaults to no and
# doesn't shrink the cluster. yes shrinks the cluster.
#
# ansible-playbook -e ireallymeanit=yes|no shrink-mon.yml
# Overrides the prompt using -e option. Can be used in
# automation scripts to avoid interactive prompt.
- name: gather facts and check the init system
hosts: "{{ mon_group_name|default('mons') }}"
become: true
tasks:
- debug: msg="gather facts on all Ceph hosts for following reference"
- name: confirm whether user really meant to remove monitor from the ceph cluster
hosts: mons[0]
become: true
vars_prompt:
- name: ireallymeanit
prompt: Are you sure you want to shrink the cluster?
default: 'no'
private: no
vars:
mon_group_name: mons
pre_tasks:
- name: exit playbook, if only one monitor is present in cluster
fail:
msg: "You are about to shrink the only monitor present in the cluster.
If you really want to do that, please use the purge-cluster playbook."
when: groups[mon_group_name] | length | int == 1
- name: exit playbook, if no monitor was given
fail:
msg: "mon_to_kill must be declared
Exiting shrink-cluster playbook, no monitor was removed.
On the command line when invoking the playbook, you can use
-e mon_to_kill=ceph-mon01 argument. You can only remove a single monitor each time the playbook runs."
when: mon_to_kill is not defined
- name: exit playbook, if the monitor is not part of the inventory
fail:
msg: "It seems that the host given is not part of your inventory, please make sure it is."
when: mon_to_kill not in groups[mon_group_name]
- name: exit playbook, if user did not mean to shrink cluster
fail:
msg: "Exiting shrink-mon playbook, no monitor was removed.
To shrink the cluster, either say 'yes' on the prompt or
or use `-e ireallymeanit=yes` on the command line when
invoking the playbook"
when: ireallymeanit != 'yes'
- import_role:
name: ceph-defaults
- import_role:
name: ceph-facts
tasks_from: container_binary
tasks:
- name: pick a monitor different than the one we want to remove
set_fact:
mon_host: "{{ item }}"
with_items: "{{ groups[mon_group_name] }}"
when: item != mon_to_kill
- name: "set_fact container_exec_cmd build {{ container_binary }} exec command (containerized)"
set_fact:
container_exec_cmd: "{{ container_binary }} exec ceph-mon-{{ hostvars[mon_host]['ansible_facts']['hostname'] }}"
when: containerized_deployment | bool
- name: exit playbook, if can not connect to the cluster
command: "{{ container_exec_cmd }} timeout 5 ceph --cluster {{ cluster }} health"
register: ceph_health
changed_when: false
until: ceph_health.stdout.find("HEALTH") > -1
delegate_to: "{{ mon_host }}"
retries: 5
delay: 2
- name: set_fact mon_to_kill_hostname
set_fact:
mon_to_kill_hostname: "{{ hostvars[mon_to_kill]['ansible_facts']['hostname'] }}"
- name: stop monitor service(s)
service:
name: ceph-mon@{{ mon_to_kill_hostname }}
state: stopped
enabled: no
delegate_to: "{{ mon_to_kill }}"
failed_when: false
- name: purge monitor store
file:
path: /var/lib/ceph/mon/{{ cluster }}-{{ mon_to_kill_hostname }}
state: absent
delegate_to: "{{ mon_to_kill }}"
- name: remove monitor from the quorum
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon remove {{ mon_to_kill_hostname }}"
changed_when: false
failed_when: false
delegate_to: "{{ mon_host }}"
post_tasks:
- name: verify the monitor is out of the cluster
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} quorum_status -f json"
delegate_to: "{{ mon_host }}"
changed_when: false
failed_when: false
register: result
until: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names']
retries: 2
delay: 10
- name: please remove the monitor from your ceph configuration file
debug:
msg: "The monitor has been successfully removed from the cluster.
Please remove the monitor entry from the rest of your ceph configuration files, cluster wide."
run_once: true
when: mon_to_kill_hostname not in (result.stdout | from_json)['quorum_names']
- name: fail if monitor is still part of the cluster
fail:
msg: "Monitor appears to still be part of the cluster, please check what happened."
run_once: true
when: mon_to_kill_hostname in (result.stdout | from_json)['quorum_names']
- name: show ceph health
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} -s"
delegate_to: "{{ mon_host }}"
changed_when: false
- name: show ceph mon status
command: "{{ container_exec_cmd }} ceph --cluster {{ cluster }} mon stat"
delegate_to: "{{ mon_host }}"
changed_when: false