Skip to content

Commit

Permalink
director: Fix user move hangs when another move is triggered early.
Browse files Browse the repository at this point in the history
Running "doveadm director flush" before the previous flush hadn't fully
finished could have caused users to hang until their move timeout:

director: Error: Finishing user 3249070169 move timed out, its state may now be inconsistent (state=waiting-for-everyone)
  • Loading branch information
sirainen authored and GitLab committed Oct 25, 2016
1 parent f9c7655 commit fd882f7
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
3 changes: 1 addition & 2 deletions src/director/director-connection.c
Expand Up @@ -549,8 +549,7 @@ director_user_refresh(struct director_connection *conn,
"replacing host %s with %s", username_hash,
net_ip2addr(&user->host->ip), net_ip2addr(&host->ip));
ret = TRUE;
} else if (user->kill_state != USER_KILL_STATE_NONE &&
user->kill_state < USER_KILL_STATE_DELAY) {
} else if (user->kill_state != USER_KILL_STATE_NONE) {
/* user is still being moved - ignore conflicting host updates
from other directors who don't yet know about the move. */
dir_debug("user refresh: %u is being moved, "
Expand Down
36 changes: 26 additions & 10 deletions src/director/director.c
Expand Up @@ -1090,6 +1090,22 @@ void director_kick_user_hash(struct director *dir, struct director_host *src,
director_update_send_version(dir, src, DIRECTOR_VERSION_USER_KICK, cmd);
}

static void
director_send_user_killed_everywhere(struct director *dir,
struct director_host *src,
struct director_host *orig_src,
unsigned int username_hash)
{
if (orig_src == NULL) {
orig_src = dir->self_host;
orig_src->last_seq++;
}
director_update_send(dir, src, t_strdup_printf(
"USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n",
net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
username_hash));
}

void director_user_killed(struct director *dir, unsigned int username_hash)
{
struct user *user;
Expand All @@ -1105,10 +1121,18 @@ void director_user_killed(struct director *dir, unsigned int username_hash)
case USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY:
director_finish_user_kill(dir, user, TRUE);
break;
case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED:
dir_debug("User %u kill_state=%s - ignoring USER-KILLED",
username_hash, user_kill_state_names[user->kill_state]);
break;
case USER_KILL_STATE_NONE:
case USER_KILL_STATE_FLUSHING:
case USER_KILL_STATE_DELAY:
case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED:
/* move restarted. state=none can also happen if USER-MOVE was
sent while we were still moving. send back
USER-KILLED-EVERYWHERE to avoid hangs. */
director_send_user_killed_everywhere(dir, dir->self_host, NULL,
username_hash);
break;
case USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE:
director_user_killed_everywhere(dir, dir->self_host,
Expand All @@ -1130,15 +1154,7 @@ void director_user_killed_everywhere(struct director *dir,
return;

director_flush_user(dir, user);

if (orig_src == NULL) {
orig_src = dir->self_host;
orig_src->last_seq++;
}
director_update_send(dir, src, t_strdup_printf(
"USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n",
net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
user->username_hash));
director_send_user_killed_everywhere(dir, src, orig_src, username_hash);
}

static void director_state_callback_timeout(struct director *dir)
Expand Down

0 comments on commit fd882f7

Please sign in to comment.