Skip to content

Commit

Permalink
Add handlers for containerized deployment
Browse files Browse the repository at this point in the history
Until now, there is no handlers for containerized deployments.

Signed-off-by: Guillaume Abrioux <gabrioux@redhat.com>
  • Loading branch information
guits committed Aug 1, 2017
1 parent df7f934 commit 656c427
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 79 deletions.
15 changes: 0 additions & 15 deletions roles/ceph-common/tasks/checks/check_socket.yml

This file was deleted.

1 change: 0 additions & 1 deletion roles/ceph-common/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@
static: False

- include: facts.yml
- include: ./checks/check_socket.yml
- include: create_ceph_initial_dirs.yml
- include: generate_ceph_conf.yml
- include: create_rbd_client_dir.yml
Expand Down
38 changes: 0 additions & 38 deletions roles/ceph-common/templates/restart_osd_daemon.sh.j2

This file was deleted.

2 changes: 1 addition & 1 deletion roles/ceph-defaults/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ os_tuning_params:
##########
# DOCKER #
##########

docker_exec_cmd:
docker: false
ceph_docker_image: "ceph/daemon"
ceph_docker_image_tag: latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,39 +17,51 @@
- name: restart ceph mon daemon(s)
command: /tmp/restart_mon_daemon.sh
listen: "restart ceph mons"

when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
- socket.rc == 0
- ceph_current_fsid.rc == 0
- mon_group_name in group_names

# This does not just restart OSDs but everything else too. Unfortunately
# at this time the ansible role does not have an OSD id list to use
# for restarting them specifically.
- block:
- name: copy osd restart script
template:
src: restart_osd_daemon.sh.j2
dest: /tmp/restart_osd_daemon.sh
owner: root
group: root
mode: 0750
listen: "restart ceph osds"
- name: copy osd restart script
template:
src: restart_osd_daemon.sh.j2
dest: /tmp/restart_osd_daemon.sh
owner: root
group: root
mode: 0750
listen: "restart ceph osds"
when:
- inventory_hostname in play_hosts
- osd_group_name in group_names

- name: restart ceph osds daemon(s)
command: /tmp/restart_osd_daemon.sh
listen: "restart ceph osds"
when: handler_health_osd_check
- name: restart containerized ceph osds daemon(s)
command: /tmp/restart_osd_daemon.sh
listen: "restart ceph osds"
with_items: "{{ socket_osd_container.results }}"
when:
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
- ((crush_location is defined and crush_location) or item.get('rc') == 0)
- handler_health_osd_check
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
- inventory_hostname in play_hosts
- osd_group_name in group_names

- name: restart non-containerized ceph osds daemon(s)
command: /tmp/restart_osd_daemon.sh
listen: "restart ceph osds"
when:
# We do not want to run these checks on initial deployment (`socket.rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
# We do not want to run these checks on initial deployment (`socket_osd_container.results[n].rc == 0`)
# except when a crush location is specified. ceph-disk will start the osds before the osd crush location is specified
- ((crush_location is defined and crush_location) or socket.rc == 0)
- ceph_current_fsid.rc == 0
- osd_group_name in group_names
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
- handler_health_osd_check
# See https://github.com/ceph/ceph-ansible/issues/1457 for the condition below
- inventory_hostname in play_hosts
- osd_group_name in group_names

- name: restart ceph mdss
service:
Expand Down
21 changes: 21 additions & 0 deletions roles/ceph-defaults/tasks/check_socket.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
# These checks are used to avoid running handlers at initial deployment.
- name: check for a ceph socket
shell: |
{{ docker_exec_cmd }} bash -c 'stat {{ rbd_client_admin_socket_path }}/*.asok > /dev/null 2>&1'
changed_when: false
failed_when: false
always_run: true
register: socket

- name: check for a ceph socket in containerized deployment (osds)
shell: |
docker exec ceph-osd-"{{ ansible_hostname }}"-"{{ item | replace('/', '') }}" bash -c 'stat /var/run/ceph/*.asok > /dev/null 2>&1'
changed_when: false
failed_when: false
always_run: true
register: socket_osd_container
with_items: "{{ devices }}"
when:
- containerized_deployment
- inventory_hostname in groups.get(osd_group_name)
1 change: 1 addition & 0 deletions roles/ceph-defaults/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
---
- include: facts.yml
- include: check_socket.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ SOCKET=/var/run/ceph/${CLUSTER}-mon.${MONITOR_NAME}.asok

check_quorum() {
while [ $RETRIES -ne 0 ]; do
MEMBERS=$(ceph --cluster ${CLUSTER} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
MEMBERS=$({{ docker_exec_cmd }} ceph --cluster ${CLUSTER} -s --format json | sed -r 's/.*"quorum_names":(\[[^]]+\]).*/\1/')
test "${MEMBERS/$MONITOR_NAME}" != "$MEMBERS" && exit 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# If we reach this point, it means there is a problem with the quorum
echo "Error with quorum."
echo "cluster status:"
ceph --cluster ${CLUSTER} -s
{{ docker_exec_cmd }} ceph --cluster ${CLUSTER} -s
exit 1
}

Expand All @@ -27,7 +27,7 @@ systemctl restart ceph-mon@${MONITOR_NAME}
COUNT=10
# Wait and ensure the socket exists after restarting the daemon
while [ $COUNT -ne 0 ]; do
test -S $SOCKET && check_quorum
{{ docker_exec_cmd }} test -S $SOCKET && check_quorum
sleep 1
let COUNT=COUNT-1
done
Expand Down
79 changes: 79 additions & 0 deletions roles/ceph-defaults/templates/restart_osd_daemon.sh.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash

RETRIES="{{ handler_health_osd_check_retries }}"
DELAY="{{ handler_health_osd_check_delay }}"
CEPH_CLI="--name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/{{ cluster }}.keyring --cluster {{ cluster }}"

check_pgs() {
while [ $RETRIES -ne 0 ]; do
test "[""$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print(json.load(sys.stdin)["pgmap"]["num_pgs"])')""]" = "$($docker_exec ceph $CEPH_CLI -s -f json | python -c 'import sys, json; print [ i["count"] for i in json.load(sys.stdin)["pgmap"]["pgs_by_state"] if i["state_name"] == "active+clean"]')"
RET=$?
test $RET -eq 0 && return 0
sleep $DELAY
let RETRIES=RETRIES-1
done
# PGs not clean, exiting with return code 1
echo "Error while running 'ceph $CEPH_CLI -s', PGs were not reported as active+clean"
echo "It is possible that the cluster has less OSDs than the replica configuration"
echo "Will refuse to continue"
$docker_exec ceph $CEPH_CLI -s
exit 1
}

wait_for_socket_in_docker() {
docker exec $1 timeout 10 bash -c "while [ ! -e /var/run/ceph/*.asok ]; do sleep 1 ; done"
#TODO(guits): What if socket never shows up?
}

get_dev_name() {
local dev_name
dev_name=$(echo $1 | sed -r 's/ceph-osd@([a-z]{1,4})\.service/\1/')
}

get_docker_id_from_dev_name() {
local id
local count
count=10
while [ $count -ne 0 ]; do
id=$(docker ps -q -f "name=$1")
test "$id" != "" && break
sleep 1
let count=count-1
done
echo "$id"
}

get_docker_osd_id() {
# wait_for_socket_in_docker $(get_docker_id_from_dev_name $1)
# echo $(docker exec $(get_docker_id_from_dev_name $1) ls /var/run/ceph | cut -d'.' -f2)
wait_for_socket_in_docker $1
echo $(docker exec $1 ls /var/run/ceph | cut -d'.' -f2)
}

# For containerized deployments, the unit file looks like: ceph-osd@sda.service
# For non-containerized deployments, the unit file looks like: ceph-osd@0.service
for unit in $(systemctl list-units | grep -oE "ceph-osd@([0-9]{1,2}|[a-z]+).service"); do
# First, restart daemon(s)
systemctl restart ${unit}
# We need to wait because it may take some time for the socket to actually exists
COUNT=10
# Wait and ensure the socket exists after restarting the daemon
{% if not containerized_deployment -%}
osd_id=$(echo ${unit#ceph-osd@} | grep -oE '[0-9]{1,2}')
{% else %}
id=$(get_dev_name $unit)
container_id=$(get_docker_id_from_dev_name $id)
osd_id=$(get_docker_osd_id $container_id)
docker_exec="docker exec $container_id"
{% endif %}
SOCKET=/var/run/ceph/test-osd.${osd_id}.asok
while [ $COUNT -ne 0 ]; do
# $docker_exec bash -c "test -S $SOCKET" && check_pgs && continue 2
$docker_exec test -S $SOCKET && check_pgs && continue 2
sleep 1
let COUNT=COUNT-1
done
# If we reach this point, it means the socket is not present.
echo "Socket file ${SOCKET} could not be found, which means the osd daemon is not running."
exit 1
done
8 changes: 7 additions & 1 deletion roles/ceph-docker-common/tasks/create_configs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,16 @@
config_type: ini
when:
- (not mon_containerized_default_ceph_conf_with_kv and
(inventory_hostname in groups.get(mon_group_name, []))) or
(inventory_hostname in groups.get(mon_group_name, []) or inventory_hostname in groups.get(osd_group_name, []))) or
(not mon_containerized_default_ceph_conf_with_kv and
((groups.get(nfs_group_name, []) | length > 0)
and (inventory_hostname == groups.get(nfs_group_name, [])[0])))
notify:
- restart ceph mons
- restart ceph osds
- restart ceph mdss
- restart ceph rgws
- restart ceph nfss

- name: set fsid fact when generate_fsid = true
set_fact:
Expand Down
1 change: 0 additions & 1 deletion roles/ceph-mon/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ openstack_keys:
##########
# DOCKER #
##########
docker_exec_cmd:
ceph_mon_docker_subnet: "{{ public_network }}"# subnet of the monitor_interface

# ceph_mon_docker_extra_env:
Expand Down

0 comments on commit 656c427

Please sign in to comment.