Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
cluster/afr: post-op-delay support
post-op-delay introduces an artificial delay between the OP and
POST-OP-CHANGELOG phases of a write transaction to increase the
probability of changelog-piggyback and eager-locking to work
more efficiently.

Also enable eager-locking by default.

Change-Id: I865ca4b68512c44818719c7e388952f15d53e6c2
BUG: 836033
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.com/3621
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pranithk@gluster.com>
  • Loading branch information
avati committed Jul 4, 2012
1 parent d90596a commit 5a3d849
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 2 deletions.
3 changes: 3 additions & 0 deletions xlators/cluster/afr/src/afr-common.c
Expand Up @@ -2400,6 +2400,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}

pthread_mutex_init (&fd_ctx->delay_lock, NULL);
INIT_LIST_HEAD (&fd_ctx->paused_calls);
INIT_LIST_HEAD (&fd_ctx->entries);

Expand Down Expand Up @@ -2654,6 +2655,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
if (fd_ctx->lock_acquired)
GF_FREE (fd_ctx->lock_acquired);

pthread_mutex_destroy (&fd_ctx->delay_lock);

GF_FREE (fd_ctx);
}

Expand Down
2 changes: 2 additions & 0 deletions xlators/cluster/afr/src/afr-lk-common.c
Expand Up @@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
piggyback = 0;
local->transaction.eager_lock[i] = 1;

afr_set_delayed_post_op (frame, this);

LOCK (&local->fd->lock);
{
if (fd_ctx->lock_acquired[i]) {
Expand Down
147 changes: 146 additions & 1 deletion xlators/cluster/afr/src/afr-transaction.c
Expand Up @@ -11,6 +11,7 @@
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
#include "timer.h"

#include "afr.h"
#include "afr-transaction.h"
Expand Down Expand Up @@ -523,7 +524,7 @@ afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
}

int
afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = this->private;
afr_internal_lock_t *int_lock = NULL;
Expand Down Expand Up @@ -877,6 +878,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
}
UNLOCK (&local->fd->lock);

afr_set_delayed_post_op (frame, this);

if (piggyback)
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
this, 1, 0, xattr[i],
Expand Down Expand Up @@ -1243,16 +1246,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
}


void
afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;

/* call this function from any of the related optimizations
which benefit from delaying post op are enabled, namely:
- changelog piggybacking
- eager locking
*/

priv = this->private;
if (!priv)
return;

if (!priv->post_op_delay_secs)
return;

local = frame->local;
if (!local)
return;

if (!local->fd)
return;

if (local->op == GF_FOP_WRITE)
local->delayed_post_op = _gf_true;
}


gf_boolean_t
is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
gf_boolean_t res = _gf_false;

local = frame->local;
if (!local)
goto out;

if (!local->delayed_post_op)
goto out;

res = _gf_true;
out:
return res;
}


void
afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd);

void
afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);

void
afr_delayed_changelog_wake_up_cbk (void *data)
{
fd_t *fd = NULL;

fd = data;

afr_delayed_changelog_wake_up (THIS, fd);
}


void
afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd)
{
afr_fd_ctx_t *fd_ctx = NULL;
call_frame_t *prev_frame = NULL;
struct timeval delta = {0, };
afr_private_t *priv = NULL;

priv = this->private;

fd_ctx = afr_fd_ctx_get (fd, this);
if (!fd_ctx)
return;

delta.tv_sec = priv->post_op_delay_secs;
delta.tv_usec = 0;

pthread_mutex_lock (&fd_ctx->delay_lock);
{
prev_frame = fd_ctx->delay_frame;
fd_ctx->delay_frame = NULL;
if (fd_ctx->delay_timer)
gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
fd_ctx->delay_timer = NULL;
if (!frame)
goto unlock;
fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
afr_delayed_changelog_wake_up_cbk,
fd);
fd_ctx->delay_frame = frame;
}
unlock:
pthread_mutex_unlock (&fd_ctx->delay_lock);

if (prev_frame) {
afr_changelog_post_op_now (prev_frame, this);
}
}


void
afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;

local = frame->local;

if (is_afr_delayed_changelog_post_op_needed (frame, this))
afr_delayed_changelog_post_op (this, frame, local->fd);
else
afr_changelog_post_op_now (frame, this);
}


void
afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
{
afr_delayed_changelog_post_op (this, NULL, fd);
}


int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
fd_t *fd = NULL;

local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
fd = local->fd;

if (fd)
/* The wake up needs to happen independent of
what type of fop arrives here. If it was
a write, then it has already inherited the
lock and changelog. If it was not a write,
then the presumption of the optimization (of
optimizing for successive write operations)
fails.
*/
afr_delayed_changelog_wake_up (this, fd);

afr_restore_lk_owner (frame);

Expand Down
2 changes: 2 additions & 0 deletions xlators/cluster/afr/src/afr-transaction.h
Expand Up @@ -31,4 +31,6 @@ afr_fd_ctx_get (fd_t *fd, xlator_t *this);
int
afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
int child, afr_xattrop_type_t op);
void
afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
16 changes: 15 additions & 1 deletion xlators/cluster/afr/src/afr.c
Expand Up @@ -183,6 +183,9 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
int32, out);

GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
uint32, out);

/* Reset this so we re-discover in case the topology changed. */
priv->did_discovery = _gf_false;

Expand Down Expand Up @@ -324,6 +327,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
fix_quorum_options(this,priv,qtype);

GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);

priv->wait_count = 1;

priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
Expand Down Expand Up @@ -617,7 +622,7 @@ struct volume_options options[] = {
},
{ .key = {"eager-lock"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
.default_value = "on",
},
{ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
Expand Down Expand Up @@ -657,5 +662,14 @@ struct volume_options options[] = {
.default_value = "600",
.description = "Poll timeout for checking the need to self-heal"
},
{ .key = {"post-op-delay-secs"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
.max = INT_MAX,
.default_value = "1",
.description = "Time interval induced artificially before "
"post-operation phase of the transaction to "
"enhance overlap of adjacent write operations.",
},
{ .key = {NULL} },
};
7 changes: 7 additions & 0 deletions xlators/cluster/afr/src/afr.h
Expand Up @@ -151,6 +151,7 @@ typedef struct _afr_private {
struct list_head saved_fds; /* list of fds on which locks have succeeded */
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
uint32_t post_op_delay_secs;
unsigned int quorum_count;

char vol_uuid[UUID_SIZE + 1];
Expand Down Expand Up @@ -413,6 +414,7 @@ typedef struct _afr_local {

dict_t *dict;
int optimistic_change_log;
gf_boolean_t delayed_post_op;

gf_boolean_t fop_paused;
int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
Expand Down Expand Up @@ -733,6 +735,11 @@ typedef struct {

unsigned char *locked_on; /* which subvolumes locks have been successful */
struct list_head paused_calls; /* queued calls while fix_open happens */

/* used for delayed-post-op optimization */
pthread_mutex_t delay_lock;
gf_timer_t *delay_timer;
call_frame_t *delay_frame;
} afr_fd_ctx_t;


Expand Down

0 comments on commit 5a3d849

Please sign in to comment.