Skip to content

Commit

Permalink
osd/: unify PGBackend pull error pathways
Browse files Browse the repository at this point in the history
This patch narrows the PGBackend -> PrimaryLogPG recovery
cancel/error interface to on_failed_pull and cancel_pull.

This patch requires careful review.

Signed-off-by: Samuel Just <sjust@redhat.com>
  • Loading branch information
athanatos committed May 1, 2019
1 parent 1a011fd commit 8a8947d
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 62 deletions.
8 changes: 3 additions & 5 deletions src/osd/ECBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,11 @@ void ECBackend::_failed_push(const hobject_t &hoid,
eversion_t v = recovery_ops[hoid].v;
recovery_ops.erase(hoid);

list<pg_shard_t> fl;
set<pg_shard_t> fl;
for (auto&& i : res.errors) {
fl.push_back(i.first);
fl.insert(i.first);
}
get_parent()->failed_push(fl, hoid);
get_parent()->backfill_add_missing(hoid, v);
get_parent()->finish_degraded_object(hoid);
get_parent()->on_failed_pull(fl, hoid, v);
}

struct OnRecoveryReadComplete :
Expand Down
38 changes: 18 additions & 20 deletions src/osd/PGBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,35 +105,33 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
pg_shard_t peer,
const hobject_t oid) = 0;

virtual void failed_push(const list<pg_shard_t> &from,
const hobject_t &soid,
const eversion_t &need = eversion_t()) = 0;
virtual void finish_degraded_object(const hobject_t& oid) = 0;
virtual void primary_failed(const hobject_t &soid) = 0;
virtual bool primary_error(const hobject_t& soid, eversion_t v) = 0;
virtual void cancel_pull(const hobject_t &soid) = 0;

virtual void apply_stats(
const hobject_t &soid,
const object_stat_sum_t &delta_stats) = 0;

/**
* Called when a read on the primary fails when pushing
* Called when a read from a set of replicas/primary fails
*/
virtual void on_primary_error(
const hobject_t &oid,
eversion_t v
) = 0;

virtual void backfill_add_missing(
const hobject_t &oid,
eversion_t v
virtual void on_failed_pull(
const set<pg_shard_t> &from,
const hobject_t &soid,
const eversion_t &v
) = 0;

virtual void remove_missing_object(const hobject_t &oid,
eversion_t v,
Context *on_complete) = 0;
/**
* Called when a pull on soid cannot be completed due to
* down peers
*/
virtual void cancel_pull(
const hobject_t &soid) = 0;

/**
* Called to remove an object.
*/
virtual void remove_missing_object(
const hobject_t &oid,
eversion_t v,
Context *on_complete) = 0;

/**
* Bless a context
Expand Down
36 changes: 12 additions & 24 deletions src/osd/PrimaryLogPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -505,16 +505,6 @@ void PrimaryLogPG::send_message_osd_cluster(
osd->send_message_osd_cluster(m, con);
}

void PrimaryLogPG::on_primary_error(
const hobject_t &oid,
eversion_t v)
{
dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
primary_failed(oid);
primary_error(oid, v);
backfill_add_missing(oid, v);
}

void PrimaryLogPG::backfill_add_missing(
const hobject_t &oid,
eversion_t v)
Expand Down Expand Up @@ -11420,14 +11410,10 @@ void PrimaryLogPG::_applied_recovered_object_replica()
}
}

void PrimaryLogPG::primary_failed(const hobject_t &soid)
{
list<pg_shard_t> fl = { pg_whoami };
failed_push(fl, soid);
}

void PrimaryLogPG::failed_push(const list<pg_shard_t> &from,
const hobject_t &soid, const eversion_t &need)
void PrimaryLogPG::on_failed_pull(
const set<pg_shard_t> &from,
const hobject_t &soid,
const eversion_t &v)
{
dout(20) << __func__ << ": " << soid << dendl;
ceph_assert(recovering.count(soid));
Expand All @@ -11452,6 +11438,12 @@ void PrimaryLogPG::failed_push(const list<pg_shard_t> &from,
<< ", reps on " << missing_loc.get_locations(soid)
<< " unfound? " << missing_loc.is_unfound(soid) << dendl;
finish_recovery_op(soid); // close out this attempt,
finish_degraded_object(soid);

if (from.count(pg_whoami)) {
primary_error(soid, v);
backfill_add_missing(soid, v);
}
}

eversion_t PrimaryLogPG::pick_newest_available(const hobject_t& oid)
Expand Down Expand Up @@ -12558,8 +12550,7 @@ int PrimaryLogPG::prep_object_replica_pushes(
h);
if (r < 0) {
dout(0) << __func__ << " Error " << r << " on oid " << soid << dendl;
primary_failed(soid);
primary_error(soid, v);
on_failed_pull({ pg_whoami }, soid, v);
return 0;
}
return 1;
Expand Down Expand Up @@ -13115,10 +13106,7 @@ int PrimaryLogPG::prep_backfill_object_push(
h);
if (r < 0) {
dout(0) << __func__ << " Error " << r << " on oid " << oid << dendl;
primary_failed(oid);
primary_error(oid, v);
backfills_in_flight.erase(oid);
missing_loc.add_missing(oid, v, eversion_t());
on_failed_pull({ pg_whoami }, oid, v);
}
return r;
}
Expand Down
17 changes: 9 additions & 8 deletions src/osd/PrimaryLogPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,17 +294,18 @@ class PrimaryLogPG : public PG, public PGBackend::Listener {
const hobject_t &oid,
const object_stat_sum_t &stat_diff,
bool is_delete) override;
void failed_push(const list<pg_shard_t> &from,
const hobject_t &soid,
const eversion_t &need = eversion_t()) override;
void primary_failed(const hobject_t &soid) override;
bool primary_error(const hobject_t& soid, eversion_t v) override;
void on_failed_pull(
const set<pg_shard_t> &from,
const hobject_t &soid,
const eversion_t &version) override;
void cancel_pull(const hobject_t &soid) override;
void apply_stats(
const hobject_t &soid,
const object_stat_sum_t &delta_stats) override;
void on_primary_error(const hobject_t &oid, eversion_t v) override;
void backfill_add_missing(const hobject_t &oid, eversion_t v) override;

bool primary_error(const hobject_t& soid, eversion_t v);

void backfill_add_missing(const hobject_t &oid, eversion_t v);
void remove_missing_object(const hobject_t &oid,
eversion_t v,
Context *on_complete) override;
Expand Down Expand Up @@ -1134,7 +1135,7 @@ class PrimaryLogPG : public PG, public PGBackend::Listener {
PGBackend::RecoveryHandle *h,
bool *work_started);

void finish_degraded_object(const hobject_t& oid) override;
void finish_degraded_object(const hobject_t& oid);

// Cancels/resets pulls from peer
void check_recovery_sources(const OSDMapRef& map) override ;
Expand Down
16 changes: 11 additions & 5 deletions src/osd/ReplicatedBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,9 @@ struct C_ReplicatedBackend_OnPullComplete : GenContext<ThreadPool::TPHandle&> {
int started = bc->start_pushes(i.hoid, obc, h);
if (started < 0) {
bc->pushing[i.hoid].clear();
bc->get_parent()->primary_failed(i.hoid);
bc->get_parent()->primary_error(i.hoid, obc->obs.oi.version);
bc->get_parent()->on_failed_pull(
{ bc->get_parent()->whoami_shard() },
i.hoid, obc->obs.oi.version);
} else if (!started) {
bc->get_parent()->on_global_recover(
i.hoid, i.stat, false);
Expand Down Expand Up @@ -2103,7 +2104,10 @@ bool ReplicatedBackend::handle_push_reply(
if (!error)
get_parent()->on_global_recover(soid, stat, false);
else
get_parent()->on_primary_error(soid, v);
get_parent()->on_failed_pull(
std::set<pg_shard_t>{ get_parent()->whoami_shard() },
soid,
v);
pushing.erase(soid);
} else {
// This looks weird, but we erased the current peer and need to remember
Expand Down Expand Up @@ -2193,10 +2197,12 @@ void ReplicatedBackend::trim_pushed_data(
void ReplicatedBackend::_failed_pull(pg_shard_t from, const hobject_t &soid)
{
dout(20) << __func__ << ": " << soid << " from " << from << dendl;
list<pg_shard_t> fl = { from };
auto it = pulling.find(soid);
assert(it != pulling.end());
get_parent()->failed_push(fl, soid, it->second.recovery_info.version);
get_parent()->on_failed_pull(
{ from },
soid,
it->second.recovery_info.version);

clear_pull(it);
}
Expand Down

0 comments on commit 8a8947d

Please sign in to comment.