Skip to content
This repository
Browse code

Merge pull request #91 from fzaninotto/alert-threshold-for-checkevents

Alert threshold for checkevents
  • Loading branch information...
commit 5242b7f6cfc6cabf1da07a10f78fd95de7071e03 2 parents d18fccc + dcca4ce
Francois Zaninotto authored January 20, 2013
11  app/dashboard/views/_checkDetails.ejs
@@ -32,12 +32,21 @@
32 32
         <label class="control-label">Slow threshold</label>
33 33
         <div class="controls">
34 34
           <div class="input-append">
35  
-            <input type="text" name="check[maxTime]" value="<%= check.maxTime %>" class="span2" />
  35
+            <input type="text" name="check[maxTime]" value="<%= check.maxTime %>" class="span1" />
36 36
             <span class="add-on">ms</span>
37 37
           </div>
38 38
         </div>
39 39
       </div>
40 40
       <div class="control-group">
  41
+        <label class="control-label">Alert Treshold</label>
  42
+        <div class="controls">
  43
+          <div class="input-append">
  44
+            <input type="text" name="check[nbErrors]" value="<%= check.nbErrors %>" class="span1" />
  45
+            <span class="add-on">failed pings</span>
  46
+          </div>
  47
+        </div>
  48
+      </div>
  49
+      <div class="control-group">
41 50
         <label class="control-label">Tags</label>
42 51
         <div class="controls">
43 52
           <textarea name="check[tags]" rows="2"/><%= check.tags.join(', ') %></textarea>
54  models/check.js
@@ -19,6 +19,8 @@ var Check = new Schema({
19 19
   url         : String,
20 20
   interval    : { type: Number, default: 60000 }, // interval between two pings
21 21
   maxTime     : { type: Number, default: 1500 },  // time under which a ping is considered responsive
  22
+  alertTreshold : { type: Number, default: 1 },   // nb of errors from which to trigger a new CheckEvent
  23
+  errorCount  : { type: Number, default: 0 },     // count number of errors
22 24
   tags        : [String],
23 25
   lastChanged : Date,
24 26
   firstTested : Date,
@@ -73,11 +75,22 @@ Check.methods.togglePause = function() {
73 75
 
74 76
 Check.methods.setLastTest = function(status, time, error) {
75 77
   var now = time ? new Date(time) : new Date();
  78
+  var mustNotifyEvent = this.mustNotifyEvent(status);
  79
+
76 80
   if (!this.firstTested) {
77 81
     this.firstTested = now;
78 82
   }
  83
+
79 84
   this.lastTested = now;
  85
+
80 86
   if (this.isUp != status) {
  87
+    this.lastChanged = now;
  88
+    this.isUp = status;
  89
+    this.uptime = 0;
  90
+    this.downtime = 0;
  91
+  }
  92
+
  93
+  if (mustNotifyEvent) {
81 94
     var event = new CheckEvent({
82 95
       timestamp: now,
83 96
       check: this,
@@ -90,10 +103,7 @@ Check.methods.setLastTest = function(status, time, error) {
90 103
       event.downtime = now.getTime() - this.lastChanged.getTime();
91 104
     }
92 105
     event.save();
93  
-    this.lastChanged = now;
94  
-    this.isUp = status;
95  
-    this.uptime = 0;
96  
-    this.downtime = 0;
  106
+    this.markEventNotified();
97 107
   }
98 108
   var durationSinceLastChange = now.getTime() - this.lastChanged.getTime();
99 109
   if (status) {
@@ -104,6 +114,42 @@ Check.methods.setLastTest = function(status, time, error) {
104 114
   return this;
105 115
 };
106 116
 
  117
+Check.methods.mustNotifyEvent = function(status) {
  118
+  if (!this.firstTested) {
  119
+    return true;
  120
+  }
  121
+  if (!status) {
  122
+    // check is down
  123
+    if (this.isUp != status) {
  124
+      // check goes down for the first time
  125
+      this.errorCount = 1;
  126
+    }
  127
+    if (this.errorCount < this.alertTreshold) {
  128
+      // repeated down pings - increase error count until reaching the down alert treshold
  129
+      this.errorCount++;
  130
+      return false;
  131
+    }
  132
+    if (this.errorCount === this.alertTreshold) {
  133
+      // enough down pings to trigger notofication
  134
+      return true;
  135
+    }
  136
+    // error count higher than treshold, that means the alert was already sent
  137
+    return false;
  138
+  }
  139
+  // check is up
  140
+  if (this.isUp != status && this.errorCount > this.alertTreshold) {
  141
+    // check goes up after reaching the down alert treshold before
  142
+    return true;
  143
+  }
  144
+  // check either goes up after less than alertTreshold down pings, or is already up for long
  145
+  return false;
  146
+}
  147
+
  148
+Check.methods.markEventNotified = function() {
  149
+  // increase error count to disable notification if the next ping has the same status
  150
+  this.errorCount = this.alertTreshold + 1;
  151
+}
  152
+
107 153
 Check.methods.getQosPercentage = function() {
108 154
   if (!this.qos) return false;
109 155
   return (this.qos.ups / this.qos.count) * 100;

0 notes on commit 5242b7f

Please sign in to comment.
Something went wrong with that request. Please try again.