diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 60a604ae29fd6..121fc8034b4cc 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3165,33 +3165,6 @@ void PG::write_if_dirty(ObjectStore::Transaction& t) t.omap_setkeys(coll, pgmeta_oid, km); } -void PG::trim_log() -{ - assert(is_primary()); - calc_trim_to(); - dout(10) << __func__ << " to " << pg_trim_to << dendl; - if (pg_trim_to != eversion_t()) { - // inform peers to trim log - assert(!actingbackfill.empty()); - for (set::iterator i = actingbackfill.begin(); - i != actingbackfill.end(); - ++i) { - if (*i == pg_whoami) continue; - osd->send_message_osd_cluster( - i->osd, - new MOSDPGTrim( - get_osdmap()->get_epoch(), - spg_t(info.pgid.pgid, i->shard), - pg_trim_to), - get_osdmap()->get_epoch()); - } - - // trim primary as well - pg_log.trim(pg_trim_to, info); - dirty_info = true; - } -} - void PG::add_log_entry(const pg_log_entry_t& e, bool applied) { // raise last_complete only if we were previously up to date @@ -3275,7 +3248,14 @@ void PG::append_log( roll_forward_to)); } - pg_log.trim(trim_to, info); + dout(10) << __func__ << " approx pg log length = " + << pg_log.get_log().approx_size() << dendl; + dout(10) << __func__ << " transaction_applied = " + << transaction_applied << dendl; + if (!transaction_applied) + dout(10) << __func__ << " " << pg_whoami + << " is backfill target" << dendl; + pg_log.trim(trim_to, info, transaction_applied); // update the local pg, pg log dirty_info = true; @@ -7344,9 +7324,6 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx) pg->publish_stats_to_osd(); } - // trim pglog on recovered - pg->trim_log(); - // adjust acting set? (e.g. because backfill completed...) bool history_les_bound = false; if (pg->acting != pg->up && !pg->choose_acting(auth_log_shard, diff --git a/src/osd/PG.h b/src/osd/PG.h index 932dc51a181c3..c628aba2fe8d7 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -2548,7 +2548,6 @@ class PG : public DoutPrefixProvider { ObjectStore::Transaction &t, bool transaction_applied = true); bool check_log_for_corruption(ObjectStore *store); - void trim_log(); std::string get_corrupt_pg_log_name() const; static int read_info( diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 96f49fd9d8501..8a6648c33eec5 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -50,14 +50,9 @@ void PGLog::IndexedLog::trim( set* trimmed_dups, eversion_t *write_from_dups) { - if (complete_to != log.end() && - complete_to->version <= s) { - generic_dout(0) << " bad trim to " << s << " when complete_to is " - << complete_to->version - << " on " << *this << dendl; - } - assert(s <= can_rollback_to); + if (complete_to != log.end()) + lgeneric_subdout(cct, osd, 20) << " complete_to " << complete_to->version << dendl; auto earliest_dup_version = log.rbegin()->version.version < cct->_conf->osd_pg_log_dups_tracked @@ -68,17 +63,17 @@ void PGLog::IndexedLog::trim( const pg_log_entry_t &e = *log.begin(); if (e.version > s) break; - generic_dout(20) << "trim " << e << dendl; + lgeneric_subdout(cct, osd, 20) << "trim " << e << dendl; if (trimmed) trimmed->insert(e.version); unindex(e); // remove from index, // add to dup list - generic_dout(20) << "earliest_dup_version = " << earliest_dup_version << dendl; + lgeneric_subdout(cct, osd, 20) << "earliest_dup_version = " << earliest_dup_version << dendl; if (e.version.version >= earliest_dup_version) { if (write_from_dups != nullptr && *write_from_dups > e.version) { - generic_dout(20) << "updating write_from_dups from " << *write_from_dups << " to " << e.version << dendl; + lgeneric_subdout(cct, osd, 20) << "updating write_from_dups from " << *write_from_dups << " to " << e.version << dendl; *write_from_dups = e.version; } dups.push_back(pg_log_dup_t(e)); @@ -91,6 +86,10 @@ void PGLog::IndexedLog::trim( } } + bool reset_complete_to = false; + // we are trimming past complete_to, so reset complete_to + if (complete_to != log.end() && e.version >= complete_to->version) + reset_complete_to = true; if (rollback_info_trimmed_to_riter == log.rend() || e.version == rollback_info_trimmed_to_riter->version) { log.pop_front(); @@ -98,13 +97,20 @@ void PGLog::IndexedLog::trim( } else { log.pop_front(); } + + // reset complete_to to the beginning of the log + if (reset_complete_to) { + lgeneric_subdout(cct, osd, 20) << " moving complete_to " << " to " + << log.begin()->version << dendl; + complete_to = log.begin(); + } } while (!dups.empty()) { const auto& e = *dups.begin(); if (e.version.version >= earliest_dup_version) break; - generic_dout(20) << "trim dup " << e << dendl; + lgeneric_subdout(cct, osd, 20) << "trim dup " << e << dendl; if (trimmed_dups) trimmed_dups->insert(e.get_key_name()); if (indexed_data & PGLOG_INDEXED_DUPS) { @@ -162,16 +168,23 @@ void PGLog::clear_info_log( void PGLog::trim( eversion_t trim_to, - pg_info_t &info) + pg_info_t &info, + bool transaction_applied) { + dout(10) << __func__ << " proposed trim_to = " << trim_to << dendl; // trim? if (trim_to > log.tail) { - // We shouldn't be trimming the log past last_complete - assert(trim_to <= info.last_complete); + dout(10) << __func__ << " missing = " << missing.num_missing() << dendl; + // Don't assert for backfill_targets + // or whenever there are missing items + if (transaction_applied && (missing.num_missing() == 0)) + assert(trim_to <= info.last_complete); dout(10) << "trim " << log << " to " << trim_to << dendl; log.trim(cct, trim_to, &trimmed, &trimmed_dups, &write_from_dups); info.log_tail = log.tail; + if (log.complete_to != log.log.end()) + dout(10) << " after trim complete_to " << log.complete_to->version << dendl; } } diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 7253936ddcfa6..6f85ee1f1a550 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -705,7 +705,8 @@ struct PGLog : DoutPrefixProvider { void trim( eversion_t trim_to, - pg_info_t &info); + pg_info_t &info, + bool transaction_applied = true); void roll_forward_to( eversion_t roll_forward_to, diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index aaf9136a45e67..aa99e398ae32d 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1567,15 +1567,20 @@ void PrimaryLogPG::calc_trim_to() PG_STATE_BACKFILL_TOOFULL)) { target = cct->_conf->osd_max_pg_log_entries; } - - eversion_t limit = MIN( - min_last_complete_ondisk, + // limit pg log trimming up to the can_rollback_to value + eversion_t limit = std::min( + pg_log.get_head(), pg_log.get_can_rollback_to()); + dout(10) << __func__ << " limit = " << limit << dendl; + if (limit != eversion_t() && limit != pg_trim_to && pg_log.get_log().approx_size() > target) { - size_t num_to_trim = MIN(pg_log.get_log().approx_size() - target, - cct->_conf->osd_pg_log_trim_max); + dout(10) << __func__ << " approx pg log length = " + << pg_log.get_log().approx_size() << dendl; + size_t num_to_trim = std::min(pg_log.get_log().approx_size() - target, + cct->_conf->osd_pg_log_trim_max); + dout(10) << __func__ << " num_to_trim = " << num_to_trim << dendl; if (num_to_trim < cct->_conf->osd_pg_log_trim_min && cct->_conf->osd_pg_log_trim_max >= cct->_conf->osd_pg_log_trim_min) { return; @@ -1585,16 +1590,15 @@ void PrimaryLogPG::calc_trim_to() for (size_t i = 0; i < num_to_trim; ++i) { new_trim_to = it->version; ++it; - if (new_trim_to > limit) { + if (new_trim_to >= limit) { new_trim_to = limit; - dout(10) << "calc_trim_to trimming to min_last_complete_ondisk" << dendl; + dout(10) << "calc_trim_to trimming to limit: " << limit << dendl; break; } } dout(10) << "calc_trim_to " << pg_trim_to << " -> " << new_trim_to << dendl; pg_trim_to = new_trim_to; assert(pg_trim_to <= pg_log.get_head()); - assert(pg_trim_to <= min_last_complete_ondisk); } }