diff --git a/src/include/rados.h b/src/include/rados.h index 3691a2ceadb02..f070fd89f8ea8 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -410,6 +410,8 @@ enum { pool uses pool snaps */ CEPH_OSD_FLAG_REDIRECTED = 0x200000, /* op has been redirected */ CEPH_OSD_FLAG_KNOWN_REDIR = 0x400000, /* redirect bit is authoritative */ + CEPH_OSD_FLAG_FULL_TRY = 0x800000, /* try op despite full flag */ + CEPH_OSD_FLAG_FULL_FORCE = 0x1000000, /* force op despite full flag */ }; enum { diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index b13925c6ea0f6..11c039b790e22 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -50,6 +50,8 @@ const char *ceph_osd_flag_name(unsigned flag) case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc"; case CEPH_OSD_FLAG_REDIRECTED: return "redirected"; case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected"; + case CEPH_OSD_FLAG_FULL_TRY: return "full_try"; + case CEPH_OSD_FLAG_FULL_FORCE: return "full_force"; default: return "???"; } } diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 95700eb109bc3..7b48d9e454160 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -898,7 +898,8 @@ bool Objecter::ms_dispatch(Message *m) void Objecter::_scan_requests(OSDSession *s, bool force_resend, - bool force_resend_writes, + bool cluster_full, + map *pool_full_map, map& need_resend, list& need_resend_linger, map& need_resend_command) @@ -918,8 +919,10 @@ void Objecter::_scan_requests(OSDSession *s, assert(op->session == s); ++lp; // check_linger_pool_dne() may touch linger_ops; prevent iterator invalidation ldout(cct, 10) << " checking linger op " << op->linger_id << dendl; - bool unregister; + bool unregister, force_resend_writes = cluster_full; int r = _recalc_linger_op_target(op, lc); + if (pool_full_map) + force_resend_writes = force_resend_writes || (*pool_full_map)[op->target.base_oloc.pool]; switch (r) { case RECALC_OP_TARGET_NO_ACTION: if (!force_resend && !force_resend_writes) @@ -947,6 +950,9 @@ void Objecter::_scan_requests(OSDSession *s, Op *op = p->second; ++p; // check_op_pool_dne() may touch ops; prevent iterator invalidation ldout(cct, 10) << " checking op " << op->tid << dendl; + bool force_resend_writes = cluster_full; + if (pool_full_map) + force_resend_writes = force_resend_writes || (*pool_full_map)[op->target.base_oloc.pool]; int r = _calc_target(&op->target, &op->last_force_resend); switch (r) { case RECALC_OP_TARGET_NO_ACTION: @@ -973,6 +979,9 @@ void Objecter::_scan_requests(OSDSession *s, CommandOp *c = cp->second; ++cp; ldout(cct, 10) << " checking command " << c->tid << dendl; + bool force_resend_writes = cluster_full; + if (pool_full_map) + force_resend_writes = force_resend_writes || (*pool_full_map)[c->target_pg.pool()]; int r = _calc_command_target(c); switch (r) { case RECALC_OP_TARGET_NO_ACTION: @@ -1020,9 +1029,14 @@ void Objecter::handle_osd_map(MOSDMap *m) } bool was_pauserd = osdmap->test_flag(CEPH_OSDMAP_PAUSERD); - bool was_full = _osdmap_full_flag(); - bool was_pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || was_full; + bool cluster_full = _osdmap_full_flag(); + bool was_pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || cluster_full || _osdmap_has_pool_full(); + map pool_full_map; + for (map::const_iterator it = osdmap->get_pools().begin(); + it != osdmap->get_pools().end(); it++) + pool_full_map[it->first] = it->second.has_flag(pg_pool_t::FLAG_FULL); + list need_resend_linger; map need_resend; map need_resend_command; @@ -1073,18 +1087,19 @@ void Objecter::handle_osd_map(MOSDMap *m) } logger->set(l_osdc_map_epoch, osdmap->get_epoch()); - was_full = was_full || _osdmap_full_flag(); - _scan_requests(homeless_session, skipped_map, was_full, - need_resend, need_resend_linger, - need_resend_command); + cluster_full = cluster_full || _osdmap_full_flag(); + update_pool_full_map(pool_full_map); + _scan_requests(homeless_session, skipped_map, cluster_full, + &pool_full_map, need_resend, + need_resend_linger, need_resend_command); // osd addr changes? for (map::iterator p = osd_sessions.begin(); p != osd_sessions.end(); ) { OSDSession *s = p->second; - _scan_requests(s, skipped_map, was_full, - need_resend, need_resend_linger, - need_resend_command); + _scan_requests(s, skipped_map, cluster_full, + &pool_full_map, need_resend, + need_resend_linger, need_resend_command); ++p; if (!osdmap->is_up(s->osd) || (s->con && @@ -1102,14 +1117,14 @@ void Objecter::handle_osd_map(MOSDMap *m) for (map::iterator p = osd_sessions.begin(); p != osd_sessions.end(); ++p) { OSDSession *s = p->second; - _scan_requests(s, false, false, need_resend, need_resend_linger, - need_resend_command); + _scan_requests(s, false, false, NULL, need_resend, + need_resend_linger, need_resend_command); } ldout(cct, 3) << "handle_osd_map decoding full epoch " << m->get_last() << dendl; osdmap->decode(m->maps[m->get_last()]); - _scan_requests(homeless_session, false, false, + _scan_requests(homeless_session, false, false, NULL, need_resend, need_resend_linger, need_resend_command); } else { @@ -1122,7 +1137,7 @@ void Objecter::handle_osd_map(MOSDMap *m) } bool pauserd = osdmap->test_flag(CEPH_OSDMAP_PAUSERD); - bool pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || _osdmap_full_flag(); + bool pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || _osdmap_full_flag() || _osdmap_has_pool_full(); // was/is paused? if (was_pauserd || was_pausewr || pauserd || pausewr || osdmap->get_epoch() < epoch_barrier) { @@ -2146,16 +2161,23 @@ ceph_tid_t Objecter::_op_submit(Op *op, RWLock::Context& lc) if ((op->target.flags & CEPH_OSD_FLAG_WRITE) && osdmap->test_flag(CEPH_OSDMAP_PAUSEWR)) { - ldout(cct, 10) << " paused modify " << op << " tid " << last_tid.read() << dendl; + ldout(cct, 10) << " paused modify " << op << " tid " << last_tid.read() + << dendl; op->target.paused = true; _maybe_request_map(); } else if ((op->target.flags & CEPH_OSD_FLAG_READ) && osdmap->test_flag(CEPH_OSDMAP_PAUSERD)) { - ldout(cct, 10) << " paused read " << op << " tid " << last_tid.read() << dendl; + ldout(cct, 10) << " paused read " << op << " tid " << last_tid.read() + << dendl; op->target.paused = true; _maybe_request_map(); - } else if ((op->target.flags & CEPH_OSD_FLAG_WRITE) && _osdmap_full_flag()) { - ldout(cct, 0) << " FULL, paused modify " << op << " tid " << last_tid.read() << dendl; + } else if ((op->target.flags & (CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_RWORDERED)) && + !(op->target.flags & (CEPH_OSD_FLAG_FULL_TRY | + CEPH_OSD_FLAG_FULL_FORCE)) && + (_osdmap_full_flag() || + _osdmap_pool_full(op->target.base_oloc.pool))) { + ldout(cct, 0) << " FULL, paused modify " << op << " tid " << last_tid.read() + << dendl; op->target.paused = true; _maybe_request_map(); } else if (!s->is_homeless()) { @@ -2357,8 +2379,9 @@ bool Objecter::is_pg_changed( bool Objecter::target_should_be_paused(op_target_t *t) { + const pg_pool_t *pi = osdmap->get_pg_pool(t->base_oloc.pool); bool pauserd = osdmap->test_flag(CEPH_OSDMAP_PAUSERD); - bool pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || _osdmap_full_flag(); + bool pausewr = osdmap->test_flag(CEPH_OSDMAP_PAUSEWR) || _osdmap_full_flag() || pi->has_flag(pg_pool_t::FLAG_FULL); return (t->flags & CEPH_OSD_FLAG_READ && pauserd) || (t->flags & CEPH_OSD_FLAG_WRITE && pausewr) || @@ -2375,6 +2398,38 @@ bool Objecter::osdmap_full_flag() const return _osdmap_full_flag(); } +bool Objecter::osdmap_pool_full(const int64_t pool_id) const +{ + RWLock::RLocker rl(rwlock); + + if (_osdmap_full_flag()) { + return true; + } + + return _osdmap_pool_full(pool_id); +} + +bool Objecter::_osdmap_pool_full(const int64_t pool_id) const +{ + const pg_pool_t *pool = osdmap->get_pg_pool(pool_id); + if (pool == NULL) { + ldout(cct, 4) << __func__ << ": DNE pool " << pool_id << dendl; + return false; + } + + return pool->has_flag(pg_pool_t::FLAG_FULL); +} + +bool Objecter::_osdmap_has_pool_full() const +{ + for (map::const_iterator it = osdmap->get_pools().begin(); + it != osdmap->get_pools().end(); it++) { + if (it->second.has_flag(pg_pool_t::FLAG_FULL)) + return true; + } + return false; +} + /** * Wrapper around osdmap->test_flag for special handling of the FULL flag. */ @@ -2384,6 +2439,17 @@ bool Objecter::_osdmap_full_flag() const return osdmap->test_flag(CEPH_OSDMAP_FULL) && honor_osdmap_full; } +void Objecter::update_pool_full_map(map& pool_full_map) +{ + for (map::const_iterator it = osdmap->get_pools().begin(); + it != osdmap->get_pools().end(); it++) { + if (pool_full_map.find(it->first) == pool_full_map.end()) { + pool_full_map[it->first] = it->second.has_flag(pg_pool_t::FLAG_FULL); + } else { + pool_full_map[it->first] = it->second.has_flag(pg_pool_t::FLAG_FULL) || pool_full_map[it->first]; + } + } +} int64_t Objecter::get_object_hash_position(int64_t pool, const string& key, const string& ns) diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index c72c9b792083e..48893494ffd0b 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -1711,6 +1711,16 @@ class Objecter : public md_config_obs_t, public Dispatcher { bool osdmap_full_flag() const; + /** + * Test pg_pool_t::FLAG_FULL on a pool + * + * @return true if the pool exists and has the flag set, or + * the global full flag is set, else false + */ + bool osdmap_pool_full(const int64_t pool_id) const; + bool _osdmap_pool_full(const int64_t pool_id) const; + void update_pool_full_map(map& pool_full_map); + private: map linger_ops; // we use this just to confirm a cookie is valid before dereferencing the ptr @@ -1756,6 +1766,7 @@ class Objecter : public md_config_obs_t, public Dispatcher { RECALC_OP_TARGET_OSD_DOWN, }; bool _osdmap_full_flag() const; + bool _osdmap_has_pool_full() const; bool target_should_be_paused(op_target_t *op); int _calc_target(op_target_t *t, epoch_t *last_force_resend=0, bool any_change=false); @@ -1919,7 +1930,8 @@ class Objecter : public md_config_obs_t, public Dispatcher { void _scan_requests(OSDSession *s, bool force_resend, - bool force_resend_writes, + bool cluster_full, + map *pool_full_map, map& need_resend, list& need_resend_linger, map& need_resend_command);