Skip to content

Commit

Permalink
afr: event gen changes
Browse files Browse the repository at this point in the history
The general idea of the changes is to prevent resetting event generation
to zero in the inode ctx, since event gen is something that should
follow 'causal order'.

Change #1:
For a read txn, in inode refresh cbk, if event_generation is
found zero, we are failing the read fop. This is not needed
because change in event gen is only a marker for the next inode refresh to
happen and should not be taken into account by the current read txn.

Change #2:
The event gen being zero above can happen if there is a racing lookup,
which resets even get (in afr_lookup_done) if there are non zero afr
xattrs. The resetting is done only to trigger an inode refresh and a
possible client side heal on the next lookup. That can be acheived by
setting the need_refresh flag in the inode ctx. So replaced all
occurences of resetting even gen to zero with a call to
afr_inode_need_refresh_set().

Change #3:
In both lookup and discover path, we are doing an inode refresh which is
not required since all 3 essentially do the same thing- update the inode
ctx with the good/bad copies from the brick replies. Inode refresh also
triggers background heals, but I think it is okay to do it when we call
refresh during the read and write txns and not in the lookup path.

The .ts which relied on inode refresh in lookup path to trigger heals are
now changed to do read txn so that inode refresh and the heal happens.

Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
Fixes: #1179
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
  • Loading branch information
itisravi authored and amarts committed Jun 22, 2020
1 parent 42172e9 commit 336ac3a
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
#We know that second brick has the bigger size file
BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)

TEST ls $M0/f3
TEST cat $M0/f3
TEST ls $M0 #Trigger entry heal via readdir inode refresh
TEST cat $M0/f3 #Trigger data heal via readv inode refresh
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0

#gfid split-brain should be resolved
Expand Down Expand Up @@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2

TEST ls $M0/f4
TEST cat $M0/f4
TEST ls $M0 #Trigger entry heal via readdir inode refresh
TEST cat $M0/f4 #Trigger data heal via readv inode refresh
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0

#gfid split-brain should be resolved
Expand Down
92 changes: 18 additions & 74 deletions xlators/cluster/afr/src/afr-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
metadatamap |= (1 << index);
}
if (metadatamap_old != metadatamap) {
event = 0;
__afr_inode_need_refresh_set(inode, this);
}
break;

Expand All @@ -898,7 +898,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
datamap |= (1 << index);
}
if (datamap_old != datamap)
event = 0;
__afr_inode_need_refresh_set(inode, this);
break;

default:
Expand Down Expand Up @@ -1061,34 +1061,6 @@ __afr_inode_read_subvol_set_small(inode_t *inode, xlator_t *this,
return ret;
}

int
__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
{
int ret = -1;
uint16_t datamap = 0;
uint16_t metadatamap = 0;
uint32_t event = 0;
uint64_t val = 0;
afr_inode_ctx_t *ctx = NULL;

ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret)
return ret;

val = ctx->read_subvol;

metadatamap = (val & 0x000000000000ffff) >> 0;
datamap = (val & 0x00000000ffff0000) >> 16;
event = 0;

val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
(((uint64_t)event) << 32);

ctx->read_subvol = val;

return ret;
}

int
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
Expand Down Expand Up @@ -1159,22 +1131,6 @@ __afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
return ret;
}

int
__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
{
afr_private_t *priv = NULL;
int ret = -1;

priv = this->private;

if (priv->child_count <= 16)
ret = __afr_inode_event_gen_reset_small(inode, this);
else
ret = -1;

return ret;
}

int
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
Expand Down Expand Up @@ -1324,38 +1280,30 @@ afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
return need_refresh;
}

static int
afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
int
__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
afr_inode_ctx_t *ctx = NULL;

GF_VALIDATE_OR_GOTO(this->name, inode, out);

LOCK(&inode->lock);
{
ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret)
goto unlock;

ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret == 0) {
ctx->need_refresh = _gf_true;
}
unlock:
UNLOCK(&inode->lock);
out:

return ret;
}

int
afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;

GF_VALIDATE_OR_GOTO(this->name, inode, out);

LOCK(&inode->lock);
{
ret = __afr_inode_event_gen_reset(inode, this);
ret = __afr_inode_need_refresh_set(inode, this);
}
UNLOCK(&inode->lock);
out:
Expand Down Expand Up @@ -1790,7 +1738,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
ret = afr_inode_get_readable(frame, inode, this, local->readable,
&event_generation, local->transaction.type);

if (ret == -EIO || (local->is_read_txn && !event_generation)) {
if (ret == -EIO) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
err = EIO;
Expand Down Expand Up @@ -3050,7 +2998,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (read_subvol == -1)
goto cant_interpret;
if (ret) {
afr_inode_event_gen_reset(local->inode, this);
afr_inode_need_refresh_set(local->inode, this);
dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
} else {
Expand Down Expand Up @@ -3606,6 +3554,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int read_subvol = -1;
int ret = 0;
unsigned char *data_readable = NULL;
unsigned char *success_replies = NULL;

Expand All @@ -3627,7 +3576,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
if (!afr_has_quorum(success_replies, this, frame))
goto unwind;

afr_replies_interpret(frame, this, local->inode, NULL);
ret = afr_replies_interpret(frame, this, local->inode, NULL);
if (ret) {
afr_inode_need_refresh_set(local->inode, this);
}

read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
data_readable);
Expand Down Expand Up @@ -3888,11 +3840,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);

if (afr_is_inode_refresh_reqd(loc->inode, this, event,
local->event_generation))
afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
else
afr_discover_do(frame, this, 0);
afr_discover_do(frame, this, 0);

return 0;
out:
Expand Down Expand Up @@ -4033,11 +3981,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);

if (afr_is_inode_refresh_reqd(loc->inode, this, event,
local->event_generation))
afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
else
afr_lookup_do(frame, this, 0);
afr_lookup_do(frame, this, 0);

return 0;
out:
Expand Down
6 changes: 3 additions & 3 deletions xlators/cluster/afr/src/afr-dir-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
continue;
if (local->replies[i].op_ret < 0) {
if (local->inode)
afr_inode_event_gen_reset(local->inode, this);
afr_inode_need_refresh_set(local->inode, this);
if (local->parent)
afr_inode_event_gen_reset(local->parent, this);
afr_inode_need_refresh_set(local->parent, this);
if (local->parent2)
afr_inode_event_gen_reset(local->parent2, this);
afr_inode_need_refresh_set(local->parent2, this);
continue;
}

Expand Down
5 changes: 4 additions & 1 deletion xlators/cluster/afr/src/afr.h
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
int event_generation);

int
afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);

int
afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);

int
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
Expand Down

0 comments on commit 336ac3a

Please sign in to comment.