Skip to content

Commit

Permalink
Merge pull request #19351 from ceph/wip-rm22281
Browse files Browse the repository at this point in the history
ceph-volume rollback on failed OSD prepare/create

Reviewed-by: Andrew Schoen <aschoen@redhat.com>
  • Loading branch information
andrewschoen committed Dec 6, 2017
2 parents ef7359d + 07be6fe commit ef1b266
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 12 deletions.
36 changes: 36 additions & 0 deletions src/ceph-volume/ceph_volume/devices/lvm/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,43 @@
from ceph_volume.util import arg_validators
from ceph_volume import process
from ceph_volume import terminal
import argparse


def rollback_osd(args, osd_id=None):
"""
When the process of creating or preparing fails, the OSD needs to be either
purged (ID fully removed) or destroyed (ID persists). This is important
because otherwise it would leave the ID around, which can cause confusion
if relying on the automatic (OSD.N + 1) behavior.
When the OSD id is specified in the command line explicitly (with
``--osd-id``) , the the ID is then preserved with a soft removal (``ceph
osd destroy``), otherwise it is fully removed with ``purge``.
"""
if not osd_id:
# it means that it wasn't generated, so there is nothing to rollback here
return

# once here, this is an error condition that needs to be rolled back
terminal.error('Was unable to complete a new OSD, will rollback changes')
osd_name = 'osd.%s'
if args.osd_id is None:
terminal.error('OSD will be fully purged from the cluster, because the ID was generated')
# the ID wasn't passed in explicitly, so make sure it is fully removed
process.run([
'ceph', 'osd', 'purge',
osd_name % osd_id,
'--yes-i-really-mean-it'])
else:
terminal.error('OSD will be destroyed, keeping the ID because it was provided with --osd-id')
# the ID was passed explicitly, so allow to re-use by using `destroy`
process.run([
'ceph', 'osd', 'destroy',
osd_name % args.osd_id,
'--yes-i-really-mean-it'])


def common_parser(prog, description):
"""
Both prepare and create share the same parser, those are defined here to
Expand Down
22 changes: 18 additions & 4 deletions src/ceph-volume/ceph_volume/devices/lvm/create.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from __future__ import print_function
from textwrap import dedent
import logging
from ceph_volume.util import system
from ceph_volume import decorators
from .common import create_parser
from .common import create_parser, rollback_osd
from .prepare import Prepare
from .activate import Activate

logger = logging.getLogger(__name__)


class Create(object):

Expand All @@ -18,8 +21,19 @@ def __init__(self, argv):
def create(self, args):
if not args.osd_fsid:
args.osd_fsid = system.generate_uuid()
Prepare([]).prepare(args)
Activate([]).activate(args)
prepare_step = Prepare([])
prepare_step.safe_prepare(args)
osd_id = prepare_step.osd_id
try:
# we try this for activate only when 'creating' an OSD, because a rollback should not
# happen when doing normal activation. For example when starting an OSD, systemd will call
# activate, which would never need to be rolled back.
Activate([]).activate(args)
except Exception:
logger.error('lvm activate was unable to complete, while creating the OSD')
logger.info('will rollback OSD ID creation')
rollback_osd(args, osd_id)
raise

def main(self):
sub_command_help = dedent("""
Expand Down Expand Up @@ -52,6 +66,6 @@ def main(self):
args = parser.parse_args(self.argv)
# Default to bluestore here since defaulting it in add_argument may
# cause both to be True
if args.bluestore is None and args.filestore is None:
if not args.bluestore and not args.filestore:
args.bluestore = True
self.create(args)
34 changes: 26 additions & 8 deletions src/ceph-volume/ceph_volume/devices/lvm/prepare.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from __future__ import print_function
import json
import logging
import uuid
from textwrap import dedent
from ceph_volume.util import prepare as prepare_utils
from ceph_volume.util import system, disk
from ceph_volume import conf, decorators, terminal
from ceph_volume.api import lvm as api
from .common import prepare_parser
from .common import prepare_parser, rollback_osd


logger = logging.getLogger(__name__)


def prepare_filestore(device, journal, secrets, id_=None, fsid=None):
Expand Down Expand Up @@ -79,6 +83,7 @@ class Prepare(object):

def __init__(self, argv):
self.argv = argv
self.osd_id = None

def get_ptuuid(self, argument):
uuid = disk.get_partuuid(argument)
Expand Down Expand Up @@ -161,6 +166,19 @@ def prepare_device(self, arg, device_type, cluster_fsid, osd_fsid):

raise RuntimeError('no data logical volume found with: %s' % arg)

def safe_prepare(self, args):
"""
An intermediate step between `main()` and `prepare()` so that we can
capture the `self.osd_id` in case we need to rollback
"""
try:
self.prepare(args)
except Exception:
logger.error('lvm prepare was unable to complete')
logger.info('will rollback OSD ID creation')
rollback_osd(args, self.osd_id)
raise

@decorators.needs_root
def prepare(self, args):
# FIXME we don't allow re-using a keyring, we always generate one for the
Expand All @@ -172,7 +190,7 @@ def prepare(self, args):
cluster_fsid = conf.ceph.get('global', 'fsid')
osd_fsid = args.osd_fsid or system.generate_uuid()
# allow re-using an id, in case a prepare failed
osd_id = args.osd_id or prepare_utils.create_id(osd_fsid, json.dumps(secrets))
self.osd_id = args.osd_id or prepare_utils.create_id(osd_fsid, json.dumps(secrets))
if args.filestore:
if not args.journal:
raise RuntimeError('--journal is required when using --filestore')
Expand All @@ -183,7 +201,7 @@ def prepare(self, args):

tags = {
'ceph.osd_fsid': osd_fsid,
'ceph.osd_id': osd_id,
'ceph.osd_id': self.osd_id,
'ceph.cluster_fsid': cluster_fsid,
'ceph.cluster_name': conf.cluster,
'ceph.data_device': data_lv.lv_path,
Expand All @@ -199,7 +217,7 @@ def prepare(self, args):
data_lv.lv_path,
journal_device,
secrets,
id_=osd_id,
id_=self.osd_id,
fsid=osd_fsid,
)
elif args.bluestore:
Expand All @@ -209,7 +227,7 @@ def prepare(self, args):

tags = {
'ceph.osd_fsid': osd_fsid,
'ceph.osd_id': osd_id,
'ceph.osd_id': self.osd_id,
'ceph.cluster_fsid': cluster_fsid,
'ceph.cluster_name': conf.cluster,
'ceph.block_device': block_lv.lv_path,
Expand All @@ -227,7 +245,7 @@ def prepare(self, args):
wal_device,
db_device,
secrets,
id_=osd_id,
id_=self.osd_id,
fsid=osd_fsid,
)

Expand Down Expand Up @@ -283,6 +301,6 @@ def main(self):
args = parser.parse_args(self.argv)
# Default to bluestore here since defaulting it in add_argument may
# cause both to be True
if args.bluestore is None and args.filestore is None:
if not args.bluestore and not args.filestore:
args.bluestore = True
self.prepare(args)
self.safe_prepare(args)

0 comments on commit ef1b266

Please sign in to comment.