Skip to content

Commit

Permalink
features/shard: Fix vm corruption upon fix-layout
Browse files Browse the repository at this point in the history
Backport of:
> Change-Id: I8a2e97d91ba3275fbc7174a008c7234fa5295d36
> BUG: 1440051
> Reviewed on: https://review.gluster.org/17010
> (cherry-picked from commit 99c8c0b)

shard's writev implementation, as part of identifying
presence of participant shards that aren't in memory,
first sends an MKNOD on these shards, and upon EEXIST error,
looks up the shards before proceeding with the writes.

The VM corruption was caused when the following happened:
1. DHT had n subvolumes initially.
2. Upon add-brick + fix-layout, the layout of .shard changed
   although the existing shards under it were yet to be migrated
   to their new hashed subvolumes.
3. During this time, there were writes on the VM falling in regions
   of the file whose corresponding shards were already existing under
   .shard.
4. Sharding xl sent MKNOD on these shards, now creating them in their
   new hashed subvolumes although there already exist shard blocks for
   this region with valid data.
5. All subsequent writes were wound on these newly created copies.

The net outcome is that both copies of the shard didn't have the correct
data. This caused the affected VMs to be unbootable.

FIX:
For want of better alternatives in DHT, the fix changes shard fops to do
a LOOKUP before the MKNOD and upon EEXIST error, perform another lookup.

Change-Id: I8a2e97d91ba3275fbc7174a008c7234fa5295d36
BUG: 1426508
RCA'd-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Reported-by: Mahdi Adnan <mahdi.adnan@outlook.com>
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://review.gluster.org/17021
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
  • Loading branch information
KritikaDhananjay authored and ShyamsundarR committed Apr 13, 2017
1 parent 1d98b9b commit 6e3054b
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 59 deletions.
154 changes: 95 additions & 59 deletions xlators/features/shard/src/shard.c
Expand Up @@ -1693,11 +1693,30 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,

if (op_ret < 0) {
/* Ignore absence of shards in the backend in truncate fop. */
if (((local->fop == GF_FOP_TRUNCATE) ||
(local->fop == GF_FOP_FTRUNCATE) ||
(local->fop == GF_FOP_RENAME) ||
(local->fop == GF_FOP_UNLINK)) && (op_errno == ENOENT))
goto done;
switch (local->fop) {
case GF_FOP_TRUNCATE:
case GF_FOP_FTRUNCATE:
case GF_FOP_RENAME:
case GF_FOP_UNLINK:
if (op_errno == ENOENT)
goto done;
break;
case GF_FOP_WRITE:
case GF_FOP_READ:
case GF_FOP_ZEROFILL:
case GF_FOP_DISCARD:
case GF_FOP_FALLOCATE:
if ((!local->first_lookup_done) &&
(op_errno == ENOENT)) {
local->create_count++;
goto done;
}
break;
default:
break;
}

/* else */
gf_msg (this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d "
"failed. Base file gfid = %s", shard_block_num,
Expand All @@ -1714,6 +1733,8 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
done:
call_count = shard_call_count_return (frame);
if (call_count == 0) {
if (!local->first_lookup_done)
local->first_lookup_done = _gf_true;
if (local->op_ret < 0)
goto unwind;
else
Expand Down Expand Up @@ -3193,47 +3214,6 @@ shard_readv_do (call_frame_t *frame, xlator_t *this)
return 0;
}

int
shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;

local = frame->local;

if (local->op_ret < 0) {
SHARD_STACK_UNWIND (readv, frame, local->op_ret,
local->op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}

shard_readv_do (frame, this);

return 0;
}

int
shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;

local = frame->local;

if (local->op_ret < 0) {
SHARD_STACK_UNWIND (readv, frame, local->op_ret,
local->op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}

if (!local->eexist_count) {
shard_readv_do (frame, this);
} else {
local->call_count = local->eexist_count;
shard_common_lookup_shards (frame, this, local->loc.inode,
shard_post_lookup_shards_readv_handler);
}
return 0;
}

int
shard_common_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
Expand Down Expand Up @@ -3264,6 +3244,7 @@ shard_common_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = shard_call_count_return (frame);
if (call_count == 0) {
SHARD_UNSET_ROOT_FS_ID (frame, local);
local->create_count = 0;
local->post_mknod_handler (frame, this);
}

Expand Down Expand Up @@ -3393,6 +3374,55 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
return 0;
}

int
shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this);

int
shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;

local = frame->local;

if (local->op_ret < 0) {
SHARD_STACK_UNWIND (readv, frame, local->op_ret,
local->op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}

if (local->create_count) {
shard_common_resume_mknod (frame, this,
shard_post_mknod_readv_handler);
} else {
shard_readv_do (frame, this);
}

return 0;
}

int
shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;

local = frame->local;

if (local->op_ret < 0) {
SHARD_STACK_UNWIND (readv, frame, local->op_ret,
local->op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}

if (!local->eexist_count) {
shard_readv_do (frame, this);
} else {
local->call_count = local->eexist_count;
shard_common_lookup_shards (frame, this, local->loc.inode,
shard_post_lookup_shards_readv_handler);
}
return 0;
}

int
shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
{
Expand All @@ -3419,9 +3449,9 @@ shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
}

if (local->call_count) {
local->create_count = local->call_count;
shard_common_resume_mknod (frame, this,
shard_post_mknod_readv_handler);
shard_common_lookup_shards (frame, this,
local->resolver_base_inode,
shard_post_lookup_shards_readv_handler);
} else {
shard_readv_do (frame, this);
}
Expand Down Expand Up @@ -3573,14 +3603,11 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,

shard_lookup_base_file (frame, this, &local->loc,
shard_post_lookup_readv_handler);

return 0;

err:
SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
NULL);
return 0;

}

int
Expand Down Expand Up @@ -3872,6 +3899,10 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
return 0;
}

int
shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
xlator_t *this);

int
shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
xlator_t *this)
Expand All @@ -3887,7 +3918,12 @@ shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
return 0;
}

shard_common_inode_write_do (frame, this);
if (local->create_count) {
shard_common_resume_mknod (frame, this,
shard_common_inode_write_post_mknod_handler);
} else {
shard_common_inode_write_do (frame, this);
}

return 0;
}
Expand Down Expand Up @@ -3935,11 +3971,13 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,

local->postbuf = local->prebuf;

if (local->create_count)
shard_common_resume_mknod (frame, this,
shard_common_inode_write_post_mknod_handler);
else
if (local->call_count) {
shard_common_lookup_shards (frame, this,
local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
} else {
shard_common_inode_write_do (frame, this);
}

return 0;
}
Expand All @@ -3959,8 +3997,6 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
return 0;
}

local->create_count = local->call_count;

shard_lookup_base_file (frame, this, &local->loc,
shard_common_inode_write_post_lookup_handler);
return 0;
Expand Down
1 change: 1 addition & 0 deletions xlators/features/shard/src/shard.h
Expand Up @@ -255,6 +255,7 @@ typedef struct shard_local {
shard_lock_t *shard_lock;
} lock;
inode_t *resolver_base_inode;
gf_boolean_t first_lookup_done;
} shard_local_t;

typedef struct shard_inode_ctx {
Expand Down

0 comments on commit 6e3054b

Please sign in to comment.