Skip to content

Commit

Permalink
Merge pull request #12882 from wonzhq/pglog-fixes
Browse files Browse the repository at this point in the history
osd: pglog trimming fixes

Reviewed-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed May 2, 2017
2 parents fcd64d7 + 7c41d4c commit 5610444
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 10 deletions.
1 change: 1 addition & 0 deletions src/common/config_opts.h
Expand Up @@ -857,6 +857,7 @@ OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 150) // make this < map_cache_

OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it
OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
OPTION(osd_force_recovery_pg_log_entries_factor, OPT_FLOAT, 1.3) // max entries factor before force recovery
OPTION(osd_pg_log_trim_min, OPT_U32, 100)
OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy
OPTION(osd_command_max_records, OPT_INT, 256)
Expand Down
6 changes: 2 additions & 4 deletions src/osd/OSD.cc
Expand Up @@ -8400,10 +8400,8 @@ void OSD::handle_pg_trim(OpRequestRef op)
dout(10) << *pg << " replica osd." << from << " lcod " << m->trim_to << dendl;
pg->peer_last_complete_ondisk[pg_shard_t(from, m->pgid.shard)] =
m->trim_to;
if (pg->calc_min_last_complete_ondisk()) {
dout(10) << *pg << " min lcod now " << pg->min_last_complete_ondisk << dendl;
pg->trim_peers();
}
// trim log when the pg is recovered
pg->calc_min_last_complete_ondisk();
} else {
// primary is instructing us to trim
ObjectStore::Transaction t;
Expand Down
12 changes: 10 additions & 2 deletions src/osd/PG.cc
Expand Up @@ -3020,12 +3020,13 @@ void PG::write_if_dirty(ObjectStore::Transaction& t)
t.omap_setkeys(coll, pgmeta_oid, km);
}

void PG::trim_peers()
void PG::trim_log()
{
assert(is_primary());
calc_trim_to();
dout(10) << "trim_peers " << pg_trim_to << dendl;
dout(10) << __func__ << " to " << pg_trim_to << dendl;
if (pg_trim_to != eversion_t()) {
// inform peers to trim log
assert(!actingbackfill.empty());
for (set<pg_shard_t>::iterator i = actingbackfill.begin();
i != actingbackfill.end();
Expand All @@ -3039,6 +3040,10 @@ void PG::trim_peers()
pg_trim_to),
get_osdmap()->get_epoch());
}

// trim primary as well
pg_log.trim(pg_trim_to, info);
dirty_info = true;
}
}

Expand Down Expand Up @@ -6814,6 +6819,9 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx)
pg->publish_stats_to_osd();
}

// trim pglog on recovered
pg->trim_log();

// adjust acting set? (e.g. because backfill completed...)
bool history_les_bound = false;
if (pg->acting != pg->up && !pg->choose_acting(auth_log_shard,
Expand Down
2 changes: 1 addition & 1 deletion src/osd/PG.h
Expand Up @@ -2284,7 +2284,7 @@ class PG : public DoutPrefixProvider {
ObjectStore::Transaction &t,
bool transaction_applied = true);
bool check_log_for_corruption(ObjectStore *store);
void trim_peers();
void trim_log();

std::string get_corrupt_pg_log_name() const;
static int read_info(
Expand Down
49 changes: 46 additions & 3 deletions src/osd/PrimaryLogPG.cc
Expand Up @@ -677,6 +677,48 @@ void PrimaryLogPG::wait_for_blocked_object(const hobject_t& soid, OpRequestRef o
op->mark_delayed("waiting for blocked object");
}

void PrimaryLogPG::maybe_force_recovery()
{
// no force if not in degraded/recovery/backfill stats
if (!is_degraded() &&
!state_test(PG_STATE_RECOVERING |
PG_STATE_RECOVERY_WAIT |
PG_STATE_BACKFILL |
PG_STATE_BACKFILL_WAIT |
PG_STATE_BACKFILL_TOOFULL))
return;

if (pg_log.get_log().approx_size() <
cct->_conf->osd_max_pg_log_entries *
cct->_conf->osd_force_recovery_pg_log_entries_factor)
return;

// find the oldest missing object
version_t min_version = 0;
hobject_t soid;
if (!pg_log.get_missing().get_items().empty()) {
min_version = pg_log.get_missing().get_rmissing().begin()->first;
soid = pg_log.get_missing().get_rmissing().begin()->second;
}
assert(!actingbackfill.empty());
for (set<pg_shard_t>::iterator it = actingbackfill.begin();
it != actingbackfill.end();
++it) {
if (*it == get_primary()) continue;
pg_shard_t peer = *it;
if (peer_missing.count(peer) &&
!peer_missing[peer].get_items().empty() &&
min_version > peer_missing[peer].get_rmissing().begin()->first) {
min_version = peer_missing[peer].get_rmissing().begin()->first;
soid = peer_missing[peer].get_rmissing().begin()->second;
}
}

// recover it
if (soid != hobject_t())
maybe_kick_recovery(soid);
}

class PGLSPlainFilter : public PGLSFilter {
string val;
public:
Expand Down Expand Up @@ -2238,6 +2280,9 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
} else if (op->may_write() || op->may_cache()) {
osd->logger->tinc(l_osd_op_w_prepare_lat, prepare_latency);
}

// force recovery of the oldest missing object if too many logs
maybe_force_recovery();
}

void PrimaryLogPG::record_write_error(OpRequestRef op, const hobject_t &soid,
Expand Down Expand Up @@ -9754,9 +9799,7 @@ void PrimaryLogPG::_committed_pushed_object(
last_complete_ondisk),
get_osdmap()->get_epoch());
} else {
// we are the primary. tell replicas to trim?
if (calc_min_last_complete_ondisk())
trim_peers();
calc_min_last_complete_ondisk();
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/osd/PrimaryLogPG.h
Expand Up @@ -1722,6 +1722,8 @@ class PrimaryLogPG : public PG, public PGBackend::Listener {
void wait_for_blocked_object(const hobject_t& soid, OpRequestRef op);
void kick_object_context_blocked(ObjectContextRef obc);

void maybe_force_recovery();

void mark_all_unfound_lost(
int what,
ConnectionRef con,
Expand Down

0 comments on commit 5610444

Please sign in to comment.