Skip to content

Commit

Permalink
osd: do not send ENXIO on misdirected op by default
Browse files Browse the repository at this point in the history
In practice this tends to get bubbled up the stack as an error on
the caller, and they usually do not handle it properly.  For example,
with librbd, this turns into EIO and break the VM.

Instead, this will manifest as a hung op on the client.  That is
also not ideal, but given that the root cause here is generally a
bug, it's not clear what else would be better.

We already log an error in the cluster log, so teuthology runs will
continue to fail.

Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit 923e7f5)

Conflicts:
	PendingReleaseNotes
	src/common/config_opts.h
  • Loading branch information
liewegas authored and shinobu-x committed Feb 17, 2017
1 parent 83af8cd commit 5bb778b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 33 deletions.
20 changes: 0 additions & 20 deletions PendingReleaseNotes

This file was deleted.

5 changes: 5 additions & 0 deletions src/common/config_opts.h
Expand Up @@ -668,6 +668,11 @@ OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false)
OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false)
OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion

OPTION(osd_debug_randomize_hobject_sort_order, OPT_BOOL, false)
OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false)
OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track
Expand Down
31 changes: 18 additions & 13 deletions src/osd/OSD.cc
Expand Up @@ -1268,11 +1268,14 @@ void OSDService::handle_misdirected_op(PG *pg, OpRequestRef op)

dout(7) << *pg << " misdirected op in " << m->get_map_epoch() << dendl;
clog->warn() << m->get_source_inst() << " misdirected " << m->get_reqid()
<< " pg " << m->get_pg()
<< " to osd." << whoami
<< " not " << pg->acting
<< " in e" << m->get_map_epoch() << "/" << osdmap->get_epoch() << "\n";
reply_op_error(op, -ENXIO);
<< " pg " << m->get_pg()
<< " to osd." << whoami
<< " not " << pg->acting
<< " in e" << m->get_map_epoch() << "/" << osdmap->get_epoch()
<< "\n";
if (g_conf->osd_enxio_on_misdirected_op) {
reply_op_error(op, -ENXIO);
}
}


Expand Down Expand Up @@ -8285,14 +8288,16 @@ void OSD::handle_op(OpRequestRef& op, OSDMapRef& osdmap)
if (!send_map->osd_is_valid_op_target(pgid.pgid, whoami)) {
dout(7) << "we are invalid target" << dendl;
clog->warn() << m->get_source_inst() << " misdirected " << m->get_reqid()
<< " pg " << m->get_pg()
<< " to osd." << whoami
<< " in e" << osdmap->get_epoch()
<< ", client e" << m->get_map_epoch()
<< " pg " << pgid
<< " features " << m->get_connection()->get_features()
<< "\n";
service.reply_op_error(op, -ENXIO);
<< " pg " << m->get_pg()
<< " to osd." << whoami
<< " in e" << osdmap->get_epoch()
<< ", client e" << m->get_map_epoch()
<< " pg " << pgid
<< " features " << m->get_connection()->get_features()
<< "\n";
if (g_conf->osd_enxio_on_misdirected_op) {
service.reply_op_error(op, -ENXIO);
}
return;
}

Expand Down

0 comments on commit 5bb778b

Please sign in to comment.