Skip to content

Commit

Permalink
Fix master timeout during handshake
Browse files Browse the repository at this point in the history
This change allows a slave to properly time out a dead master during
the extended asynchronous synchronization state machine.  Now, slaves
will record their last interaction with the master and apply the
replication timeout before a response to the PSYNC request is received.
  • Loading branch information
kevinmcgehee committed Oct 14, 2015
1 parent 560142e commit 3e0b34c
Showing 1 changed file with 19 additions and 3 deletions.
22 changes: 19 additions & 3 deletions src/replication.c
Expand Up @@ -41,6 +41,7 @@ void replicationDiscardCachedMaster(void);
void replicationResurrectCachedMaster(int newfd);
void replicationSendAck(void);
void putSlaveOnline(redisClient *slave);
int serverInHandshakeState(int repl_state);

/* --------------------------- Utility functions ---------------------------- */

Expand Down Expand Up @@ -1190,6 +1191,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
return sdscatprintf(sdsempty(),"-Reading from master: %s",
strerror(errno));
}
server.repl_transfer_lastio = server.unixtime;
return sdsnew(buf);
}
return NULL;
Expand Down Expand Up @@ -1619,7 +1621,7 @@ void undoConnectWithMaster(void) {
int fd = server.repl_transfer_s;

redisAssert(server.repl_state == REDIS_REPL_CONNECTING ||
server.repl_state == REDIS_REPL_RECEIVE_PONG);
serverInHandshakeState(server.repl_state));
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
close(fd);
server.repl_transfer_s = -1;
Expand All @@ -1638,7 +1640,7 @@ int cancelReplicationHandshake(void) {
if (server.repl_state == REDIS_REPL_TRANSFER) {
replicationAbortSyncTransfer();
} else if (server.repl_state == REDIS_REPL_CONNECTING ||
server.repl_state == REDIS_REPL_RECEIVE_PONG)
serverInHandshakeState(server.repl_state))
{
undoConnectWithMaster();
} else {
Expand Down Expand Up @@ -1810,6 +1812,20 @@ void roleCommand(redisClient *c) {
}
}

/* Returns 1 if the given replication state is a handshake state,
* 0 otherwise. */
int serverInHandshakeState(int repl_state) {
return repl_state == REDIS_REPL_RECEIVE_PONG ||
repl_state == REDIS_REPL_SEND_AUTH ||
repl_state == REDIS_REPL_RECEIVE_AUTH ||
repl_state == REDIS_REPL_SEND_PORT ||
repl_state == REDIS_REPL_RECEIVE_PORT ||
repl_state == REDIS_REPL_SEND_CAPA ||
repl_state == REDIS_REPL_RECEIVE_CAPA ||
repl_state == REDIS_REPL_SEND_PSYNC ||
repl_state == REDIS_REPL_RECEIVE_PSYNC;
}

/* Send a REPLCONF ACK command to the master to inform it about the current
* processed offset. If we are not connected with a master, the command has
* no effects. */
Expand Down Expand Up @@ -2045,7 +2061,7 @@ void replicationCron(void) {
/* Non blocking connection timeout? */
if (server.masterhost &&
(server.repl_state == REDIS_REPL_CONNECTING ||
server.repl_state == REDIS_REPL_RECEIVE_PONG) &&
serverInHandshakeState(server.repl_state)) &&
(time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");
Expand Down

0 comments on commit 3e0b34c

Please sign in to comment.