Skip to content

Commit

Permalink
glusterd rebalance: handle the write failure properly
Browse files Browse the repository at this point in the history
also, make sure the sizes are same before renaming the target file
to the original file, hence prevent a possible data-loss.

Change-Id: Ie88224ba62a4604f8c0149f84fa462abfbd6ad78
BUG: 3193
Reviewed-on: http://review.gluster.com/29
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
  • Loading branch information
amarts authored and avati committed Aug 1, 2011
1 parent 12752fa commit a59fc48
Showing 1 changed file with 53 additions and 10 deletions.
63 changes: 53 additions & 10 deletions xlators/mgmt/glusterd/src/glusterd-rebalance.c
Expand Up @@ -154,9 +154,11 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
struct dirent *entry = NULL;
struct stat stbuf = {0,};
struct stat new_stbuf = {0,};
struct stat dst_stbuf = {0,};
char full_path[1024] = {0,};
char tmp_filename[1024] = {0,};
char value[16] = {0,};
char file_not_copied_fully = 0;

if (!volinfo->defrag)
goto out;
Expand Down Expand Up @@ -210,24 +212,25 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)

while (1) {
ret = read (src_fd, defrag->databuf, 131072);
if (!ret || (ret < 0)) {
if (ret < 0) {
file_not_copied_fully = 1;
break;
}
/* If EOF is hit, then we get 'ret == 0' */
if (!ret)
break;

ret = write (dst_fd, defrag->databuf, ret);
if (ret < 0) {
file_not_copied_fully = 1;
break;
}
}

ret = lstat (full_path, &new_stbuf);
if (ret < 0) {
close (dst_fd);
close (src_fd);
continue;
}
/* No need to rebalance, if there is some
activity on source file */
if (new_stbuf.st_mtime != stbuf.st_mtime) {
if (file_not_copied_fully) {
gf_log (THIS->name, GF_LOG_WARNING,
"failed to copy the file fully : %s (%s)",
full_path, strerror (errno));
close (dst_fd);
close (src_fd);
continue;
Expand All @@ -254,6 +257,46 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
tmp_filename, strerror (errno));
}

ret = fstat (src_fd, &new_stbuf);
if (ret < 0) {
gf_log (THIS->name, GF_LOG_WARNING,
"failed to get stat: %s (%s)",
full_path, strerror (errno));
close (dst_fd);
close (src_fd);
continue;
}

ret = fstat (dst_fd, &dst_stbuf);
if (ret < 0) {
gf_log (THIS->name, GF_LOG_WARNING,
"failed to get stat on temp file: %s (%s)",
tmp_filename, strerror (errno));
close (dst_fd);
close (src_fd);
continue;
}

/* No need to rebalance, if there is some
activity on source file */
if (new_stbuf.st_mtime != stbuf.st_mtime) {
gf_log (THIS->name, GF_LOG_WARNING,
"file got changed after we started copying %s",
full_path);
close (dst_fd);
close (src_fd);
continue;
}

if (new_stbuf.st_size != dst_stbuf.st_size) {
gf_log (THIS->name, GF_LOG_WARNING,
"file sizes are not same : %s",
full_path);
close (dst_fd);
close (src_fd);
continue;
}

ret = rename (tmp_filename, full_path);
if (ret != -1) {
LOCK (&defrag->lock);
Expand Down

0 comments on commit a59fc48

Please sign in to comment.