Skip to content

Commit

Permalink
Merge f1c9601 into 7e70c43
Browse files Browse the repository at this point in the history
  • Loading branch information
buger committed Jan 8, 2019
2 parents 7e70c43 + f1c9601 commit 4106a72
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
28 changes: 20 additions & 8 deletions host_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,21 @@ func (h *HostUptimeChecker) HostReporter() {
case okHost := <-h.okChan:
// Clear host from unhealthylist if it exists
if h.unHealthyList[okHost.CheckURL] {
h.upCallback(okHost)
delete(h.unHealthyList, okHost.CheckURL)
newVal := 1
if count, found := h.sampleCache.Get(okHost.CheckURL); found {
newVal = count.(int) - 1
}

if newVal <= 0 {
// Reset the count
h.sampleCache.Delete(okHost.CheckURL)
log.Warning("[HOST CHECKER] [HOST UP]: ", okHost.CheckURL)
h.upCallback(okHost)
delete(h.unHealthyList, okHost.CheckURL)
} else {
log.Warning("[HOST CHECKER] [HOST UP BUT NOT REACHED LIMIT]: ", okHost.CheckURL)
h.sampleCache.Set(okHost.CheckURL, newVal, cache.DefaultExpiration)
}
}
go h.pingCallback(okHost)

Expand All @@ -123,16 +136,15 @@ func (h *HostUptimeChecker) HostReporter() {
newVal = count.(int) + 1
}

h.sampleCache.Set(failedHost.CheckURL, newVal, cache.DefaultExpiration)

if newVal >= h.sampleTriggerLimit {
log.Debug("[HOST CHECKER] [HOST WARNING]: ", failedHost.CheckURL)
// Reset the count
h.sampleCache.Set(failedHost.CheckURL, 1, cache.DefaultExpiration)
log.Warning("[HOST CHECKER] [HOST DOWN]: ", failedHost.CheckURL)
// track it
h.unHealthyList[failedHost.CheckURL] = true
// Call the custom callback hook
go h.failureCallback(failedHost)
} else {
log.Warning("[HOST CHECKER] [HOST DOWN BUT NOT REACHED LIMIT]: ", failedHost.CheckURL)
h.sampleCache.Set(failedHost.CheckURL, newVal, cache.DefaultExpiration)
}
go h.pingCallback(failedHost)

Expand Down Expand Up @@ -198,7 +210,7 @@ func (h *HostUptimeChecker) CheckHost(toCheck HostData) {
}

func (h *HostUptimeChecker) Init(workers, triggerLimit, timeout int, hostList map[string]HostData, failureCallback func(HostHealthReport), upCallback func(HostHealthReport), pingCallback func(HostHealthReport)) {
h.sampleCache = cache.New(30*time.Second, 5*time.Second)
h.sampleCache = cache.New(30*time.Second, 30*time.Second)
h.stopPollingChan = make(chan bool)
h.errorChan = make(chan HostHealthReport)
h.okChan = make(chan HostHealthReport)
Expand Down
2 changes: 1 addition & 1 deletion host_checker_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ func (hc *HostCheckerManager) OnHostDown(report HostHealthReport) {
log.WithFields(logrus.Fields{
"prefix": "host-check-mgr",
}).Debug("Update key: ", hc.getHostKey(report))
hc.store.SetKey(hc.getHostKey(report), "1", int64(hc.checker.checkTimeout+1))
hc.store.SetKey(hc.getHostKey(report), "1", int64(hc.checker.checkTimeout*hc.checker.sampleTriggerLimit))

spec := getApiSpec(report.MetaData[UnHealthyHostMetaDataAPIKey])
if spec == nil {
Expand Down
2 changes: 1 addition & 1 deletion host_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func TestHostChecker(t *testing.T) {
}

redisStore := GlobalHostChecker.store.(storage.RedisCluster)
if ttl, _ := redisStore.GetKeyTTL(PoolerHostSentinelKeyPrefix + testHttpFailure); int(ttl) != GlobalHostChecker.checker.checkTimeout+1 {
if ttl, _ := redisStore.GetKeyTTL(PoolerHostSentinelKeyPrefix + testHttpFailure); int(ttl) != GlobalHostChecker.checker.checkTimeout*GlobalHostChecker.checker.sampleTriggerLimit {
t.Error("HostDown expiration key should be checkTimeout + 1", ttl)
}
GlobalHostChecker.checkerMu.Unlock()
Expand Down

0 comments on commit 4106a72

Please sign in to comment.