Skip to content

Commit

Permalink
Merge pull request #5287: PGLog::proc_replica_log: correctly handle c…
Browse files Browse the repository at this point in the history
…ase where entries between olog.head and log.tail were split out

Reviewed-by: Loic Dachary <ldachary@redhat.com>
  • Loading branch information
ldachary committed Oct 21, 2015
2 parents 7e30d19 + 8c02376 commit d96af1d
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 43 deletions.
54 changes: 36 additions & 18 deletions src/osd/PGLog.cc
Expand Up @@ -185,6 +185,18 @@ void PGLog::proc_replica_log(
dout(10) << "proc_replica_log for osd." << from << ": "
<< oinfo << " " << olog << " " << omissing << dendl;

if (olog.head < log.tail) {
dout(10) << __func__ << ": osd." << from << " does not overlap, not looking "
<< "for divergent objects" << dendl;
return;
}
if (olog.head == log.head) {
dout(10) << __func__ << ": osd." << from << " same log head, not looking "
<< "for divergent objects" << dendl;
return;
}
assert(olog.head >= log.tail);

/*
basically what we're doing here is rewinding the remote log,
dropping divergent entries, until we find something that matches
Expand All @@ -202,48 +214,54 @@ void PGLog::proc_replica_log(
<< " have " << i->second.have << dendl;
}

list<pg_log_entry_t>::const_iterator fromiter = log.log.end();
eversion_t lower_bound = log.tail;
list<pg_log_entry_t>::const_reverse_iterator first_non_divergent =
log.log.rbegin();
while (1) {
if (fromiter == log.log.begin())
if (first_non_divergent == log.log.rend())
break;
--fromiter;
if (fromiter->version <= olog.head) {
dout(20) << "merge_log cut point (usually last shared) is "
<< *fromiter << dendl;
lower_bound = fromiter->version;
++fromiter;
if (first_non_divergent->version <= olog.head) {
dout(20) << "merge_log point (usually last shared) is "
<< *first_non_divergent << dendl;
break;
}
++first_non_divergent;
}

/* Because olog.head >= log.tail, we know that both pgs must at least have
* the event represented by log.tail. Thus, lower_bound >= log.tail. It's
* possible that olog/log contain no actual events between olog.head and
* log.tail, however, since they might have been split out. Thus, if
* we cannot find an event e such that log.tail <= e.version <= log.head,
* the last_update must actually be log.tail.
*/
eversion_t lu =
(first_non_divergent == log.log.rend() ||
first_non_divergent->version < log.tail) ?
log.tail :
first_non_divergent->version;

list<pg_log_entry_t> divergent;
list<pg_log_entry_t>::const_iterator pp = olog.log.end();
eversion_t lu(oinfo.last_update);
while (true) {
if (pp == olog.log.begin()) {
if (pp != olog.log.end()) // no last_update adjustment if we discard nothing!
lu = olog.tail;
if (pp == olog.log.begin())
break;
}

--pp;
const pg_log_entry_t& oe = *pp;

// don't continue past the tail of our log.
if (oe.version <= log.tail) {
lu = oe.version;
++pp;
break;
}

if (oe.version <= lower_bound) {
lu = oe.version;
if (oe.version <= lu) {
++pp;
break;
}

divergent.push_front(oe);
}
}


IndexedLog folog;
Expand Down
84 changes: 59 additions & 25 deletions src/test/osd/TestPGLog.cc
Expand Up @@ -138,6 +138,14 @@ class PGLogTest : public ::testing::Test, protected PGLog {
fullauth.index();
fulldiv.index();
}
void set_div_bounds(eversion_t head, eversion_t tail) {
fulldiv.tail = divinfo.log_tail = tail;
fulldiv.head = divinfo.last_update = head;
}
void set_auth_bounds(eversion_t head, eversion_t tail) {
fullauth.tail = authinfo.log_tail = tail;
fullauth.head = authinfo.last_update = head;
}
const IndexedLog &get_fullauth() const { return fullauth; }
const IndexedLog &get_fulldiv() const { return fulldiv; }
const pg_info_t &get_authinfo() const { return authinfo; }
Expand Down Expand Up @@ -235,6 +243,8 @@ class PGLogTest : public ::testing::Test, protected PGLog {
proc_replica_log(
t, oinfo, olog, omissing, pg_shard_t(1, shard_id_t(0)));

assert(oinfo.last_update >= log.tail);

if (!tcase.base.empty()) {
ASSERT_EQ(tcase.base.rbegin()->version, oinfo.last_update);
}
Expand Down Expand Up @@ -1270,8 +1280,8 @@ TEST_F(PGLogTest, proc_replica_log) {
pg_shard_t from;

eversion_t last_update(1, 1);
oinfo.last_update = last_update;
eversion_t last_complete(2, 1);
log.head = olog.head = oinfo.last_update = last_update;
eversion_t last_complete(1, 1);
oinfo.last_complete = last_complete;

EXPECT_TRUE(t.empty());
Expand Down Expand Up @@ -1470,12 +1480,12 @@ TEST_F(PGLogTest, proc_replica_log) {
}

/* +--------------------------+
| log olog |
| olog log |
+--------+-------+---------+
| |object | |
|version | hash | version |
| | | |
tail > (1,1) | x5 | (1,1) < tail
tail > (1,1) | x9 | (1,1) < tail
| | | |
| | | |
| (1,2) | x3 | (1,2) |
Expand Down Expand Up @@ -1503,34 +1513,38 @@ TEST_F(PGLogTest, proc_replica_log) {
pg_shard_t from;

eversion_t last_update(1, 2);
hobject_t divergent_object;
divergent_object.hash = 0x9;

{
pg_log_entry_t e;
e.mod_desc.mark_unrollbackable();

e.version = eversion_t(1, 1);
e.soid.hash = 0x5;
e.soid = divergent_object;
log.tail = e.version;
log.log.push_back(e);
e.version = last_update;
e.soid.hash = 0x3;
log.log.push_back(e);
e.version = eversion_t(1,3);
e.soid.hash = 0x9;
e.version = eversion_t(2, 3);
e.prior_version = eversion_t(1, 1);
e.soid = divergent_object;
e.op = pg_log_entry_t::DELETE;
log.log.push_back(e);
log.head = e.version;
log.index();

e.version = eversion_t(1, 1);
e.soid.hash = 0x5;
e.soid = divergent_object;
olog.tail = e.version;
olog.log.push_back(e);
e.version = last_update;
e.soid.hash = 0x3;
olog.log.push_back(e);
e.version = eversion_t(2, 3);
e.soid.hash = 0x9;
e.version = eversion_t(1, 3);
e.prior_version = eversion_t(1, 1);
e.soid = divergent_object;
e.op = pg_log_entry_t::DELETE;
olog.log.push_back(e);
olog.head = e.version;
Expand All @@ -1547,28 +1561,30 @@ TEST_F(PGLogTest, proc_replica_log) {
proc_replica_log(t, oinfo, olog, omissing, from);

EXPECT_TRUE(t.empty());
EXPECT_FALSE(omissing.have_missing());
EXPECT_TRUE(omissing.have_missing());
EXPECT_TRUE(omissing.is_missing(divergent_object));
EXPECT_EQ(omissing.missing[divergent_object].have, eversion_t(0, 0));
EXPECT_EQ(omissing.missing[divergent_object].need, eversion_t(1, 1));
EXPECT_EQ(last_update, oinfo.last_update);
EXPECT_EQ(last_update, oinfo.last_complete);
}

/* +--------------------------+
| log olog |
| olog log |
+--------+-------+---------+
| |object | |
|version | hash | version |
| | | |
tail > (1,1) | x5 | (1,1) < tail
tail > (1,1) | x9 | (1,1) < tail
| | | |
| | | |
| (1,2) | x3 | (1,2) |
| | | |
| | | |
head > (1,3) | x9 | |
| DELETE | | |
| MODIFY | | |
| | | |
| | x9 | (2,3) < head
| | | MODIFY |
| | | DELETE |
| | | |
+--------+-------+---------+
Expand All @@ -1593,28 +1609,30 @@ TEST_F(PGLogTest, proc_replica_log) {
e.mod_desc.mark_unrollbackable();

e.version = eversion_t(1, 1);
e.soid.hash = 0x5;
e.soid = divergent_object;
log.tail = e.version;
log.log.push_back(e);
e.version = last_update;
e.soid.hash = 0x3;
log.log.push_back(e);
e.version = eversion_t(1, 3);
e.soid.hash = 0x9;
e.version = eversion_t(2, 3);
e.prior_version = eversion_t(1, 1);
e.soid = divergent_object;
e.op = pg_log_entry_t::DELETE;
log.log.push_back(e);
log.head = e.version;
log.index();

e.version = eversion_t(1, 1);
e.soid.hash = 0x5;
e.soid = divergent_object;
olog.tail = e.version;
olog.log.push_back(e);
e.version = last_update;
e.soid.hash = 0x3;
olog.log.push_back(e);
e.version = eversion_t(2, 3);
e.soid.hash = 0x9;
e.version = eversion_t(1, 3);
e.prior_version = eversion_t(1, 1);
e.soid = divergent_object;
divergent_object = e.soid;
omissing.add(divergent_object, e.version, eversion_t());
e.op = pg_log_entry_t::MODIFY;
Expand All @@ -1628,16 +1646,18 @@ TEST_F(PGLogTest, proc_replica_log) {
EXPECT_TRUE(t.empty());
EXPECT_TRUE(omissing.have_missing());
EXPECT_TRUE(omissing.is_missing(divergent_object));
EXPECT_EQ(eversion_t(2, 3), omissing.missing[divergent_object].need);
EXPECT_EQ(eversion_t(1, 3), omissing.missing[divergent_object].need);
EXPECT_EQ(olog.head, oinfo.last_update);
EXPECT_EQ(olog.head, oinfo.last_complete);

proc_replica_log(t, oinfo, olog, omissing, from);

EXPECT_TRUE(t.empty());
EXPECT_FALSE(omissing.have_missing());
EXPECT_TRUE(omissing.have_missing());
EXPECT_TRUE(omissing.is_missing(divergent_object));
EXPECT_EQ(omissing.missing[divergent_object].have, eversion_t(0, 0));
EXPECT_EQ(omissing.missing[divergent_object].need, eversion_t(1, 1));
EXPECT_EQ(last_update, oinfo.last_update);
EXPECT_EQ(last_update, oinfo.last_complete);
}

/* +--------------------------+
Expand Down Expand Up @@ -1862,6 +1882,20 @@ TEST_F(PGLogTest, merge_log_prior_version_have) {
run_test_case(t);
}

TEST_F(PGLogTest, merge_log_split_missing_entries_at_head) {
TestCase t;
t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100)));

t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(8, 70), mk_evt(8, 65)));

t.setup();
t.set_div_bounds(mk_evt(9, 79), mk_evt(8, 69));
t.set_auth_bounds(mk_evt(10, 160), mk_evt(9, 77));
t.final.add(mk_obj(1), mk_evt(15, 150), mk_evt(8, 70));
run_test_case(t);
}

int main(int argc, char **argv) {
vector<const char*> args;
argv_to_vec(argc, (const char **)argv, args);
Expand Down

0 comments on commit d96af1d

Please sign in to comment.