Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Adding Feature: when all nodes reaches fail status, and slaves wakes up. promoting it as master #1374

Open
wants to merge 1 commit into from

2 participants

@charsyam

there is 3 nodes A,B,C

A(M), B(S), C(S)
A fails, B Promoted
A(F), B(M), C(S)
B fails, C Promoted
A(F), B(F), C(M)
C fails
A(F), B(F), C(F)
and if B or C revives sentinel can't detect.
A(F),B(F),C(S)

This patch will promote any slaves that revivives.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Nov 7, 2013
  1. @openbaas

    fix sentinel

    openbaas authored
This page is out of date. Refresh to see the latest.
Showing with 57 additions and 7 deletions.
  1. +1 −1  src/redis.c
  2. +56 −6 src/sentinel.c
View
2  src/redis.c
@@ -1393,7 +1393,7 @@ void initServerConfig() {
server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
server.repl_serve_stale_data = REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA;
server.repl_slave_ro = REDIS_DEFAULT_SLAVE_READ_ONLY;
- server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
+ server.repl_down_since = mstime(); /* Never connected, repl is down since EVER. */
server.repl_disable_tcp_nodelay = REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY;
server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY;
server.master_repl_offset = 0;
View
62 src/sentinel.c
@@ -1140,6 +1140,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
newaddr = createSentinelAddr(ip,port);
if (newaddr == NULL) return REDIS_ERR;
sentinelResetMaster(master,SENTINEL_NO_FLAGS);
+
oldaddr = master->addr;
master->addr = newaddr;
master->o_down_since_time = 0;
@@ -1151,6 +1152,50 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
return REDIS_OK;
}
+int sentinelRestoreMaster(sentinelRedisInstance *master) {
+ dictIterator *di;
+ dictEntry *de;
+ sentinelRedisInstance *slave;
+ char slave_ip[256];
+ char master_ip[256];
+ int slave_port;
+ int master_port;
+ sentinelAddr *oldaddr;
+
+ strcpy(master_ip, master->slave_master_host);
+ master_port = master->slave_master_port;
+
+ dict *slaves = master->slaves;
+ master->slaves = dictCreate(&instancesDictType,NULL);
+
+ oldaddr = master->addr;
+
+ strcpy(slave_ip, oldaddr->ip);
+ slave_port = oldaddr->port;
+
+ sentinelResetMasterAndChangeAddress(master, master_ip, master_port);
+
+ slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slave_ip,
+ slave_port, master->quorum, master);
+
+ di = dictGetIterator(slaves);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ if (strcmp(ri->addr->ip, master_ip) == 0 &&
+ ri->addr->port == master_port ) {
+ continue;
+ }
+
+ slave = createSentinelRedisInstance(NULL,SRI_SLAVE,ri->addr->ip,
+ ri->addr->port, master->quorum, master);
+
+ }
+ dictReleaseIterator(di);
+ dictRelease(slaves);
+ return REDIS_OK;
+}
+
+
/* Return non-zero if there was no SDOWN or ODOWN error associated to this
* instance in the latest 'ms' milliseconds. */
int sentinelRedisInstanceNoDownFor(sentinelRedisInstance *ri, mstime_t ms) {
@@ -1485,12 +1530,17 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
* master, always. */
if ((ri->flags & SRI_MASTER) && role == SRI_SLAVE && ri->slave_master_host)
{
- sentinelEvent(REDIS_WARNING,"+redirect-to-master",ri,
- "%s %s %d %s %d",
- ri->name, ri->addr->ip, ri->addr->port,
- ri->slave_master_host, ri->slave_master_port);
- sentinelResetMasterAndChangeAddress(ri,ri->slave_master_host,
- ri->slave_master_port);
+ if (ri->slaves == NULL) {
+ sentinelEvent(REDIS_WARNING,"+redirect-to-master",ri,
+ "%s %s %d %s %d",
+ ri->name, ri->addr->ip, ri->addr->port,
+ ri->slave_master_host, ri->slave_master_port);
+ sentinelResetMasterAndChangeAddress(ri,ri->slave_master_host,
+ ri->slave_master_port);
+ } else {
+ sentinelRestoreMaster(ri);
+ }
+
return; /* Don't process anything after this event. */
}
Something went wrong with that request. Please try again.