Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

refactoring and improvements for v1.0

  • Loading branch information...
commit 365b310f78d37990d563f2cfc3f5ac3705bf2c7c 1 parent 58f8660
@iloire authored
View
26 README.md
@@ -58,7 +58,6 @@ Make sure you install those dependencies by issuing the command:
}
]
}
-
b) **Define Postmark and notifications settings:**
exports.notifications = {
@@ -72,7 +71,7 @@ Make sure you install those dependencies by issuing the command:
c) **Run watchmen**
- $ node watchmen.js
+ $ node server.js
or more probably you would want to use **forever** to run it in the background
@@ -86,6 +85,29 @@ Make sure you install those dependencies by issuing the command:
$ node test/runtests.js
+## Redis database schema
+
+ host:port:url:status: (hash): json object representing the status of that particular service:
+
+ - elapsed_time
+ - status
+ - avg_response_time
+ - lastok
+ - down_time: set timestamp when site is down.
+
+ <host>:<port>:<url>:events:error (list) : list of errors for that particular url (contains entries as json objects)
+
+ <host>:<port>:<url>:events:warning (list) : list of warnings for that particular url (contains entries as json objects)
+
+ <host>:<port>:<url>:<day_timestamp>:errors_by_minute: (sorted set): list of minutes for that particular day where there was any error. With this we can easily calculate downtime by counting how many minutes flagged with errro contains a particular day.
+
+ <host>:<port>:<url>:<day_timestamp>:warning_by_minute: (sorted set): (idem with warnings)
+
+ <host>:<port>:<url>:<day_timestamp>:avg_response_time: (sorted set): list of warnings per minute for this day. item: minute_time_stamp, score: avg_response_time
+
+ <host>:<port>:<url>:<day_timestamp>:avg_response_time:counter (int): number of request in this particular minute (used to calculate response time avg per minute)
+
+
## History
**0.9**
View
10 config.js
@@ -6,7 +6,7 @@
- when site is down, ping interval, in seconds, until site is backup again
*/
-var one_minute = 60 //you can set this to other value for testing the service in dev env.
+var one_minute = 6 //you can set this to other value for testing the service in dev env.
var performance_test = false //set to true if you want a bulk config file to be used for testing instead of the following list of hosts
exports.database = { port: 1216, host : '127.0.0.1', db: 'watchmen' }
@@ -121,7 +121,7 @@ var hosts =
ping_interval: one_minute,
failed_ping_interval: one_minute,
warning_if_takes_more_than: 3000, //miliseconds
- enabled: true,
+ enabled: false,
urls : [
{
method: 'get',
@@ -139,7 +139,7 @@ var hosts =
ping_interval: 3 * one_minute,
failed_ping_interval: one_minute,
warning_if_takes_more_than: 3000, //miliseconds
- enabled: true,
+ enabled: false,
urls : [
{
method: 'get',
@@ -180,7 +180,7 @@ var hosts =
timeout:10000,
ping_interval: one_minute,
failed_ping_interval: one_minute,
- enabled: true,
+ enabled: false,
urls : [
{
method: 'get',
@@ -248,7 +248,7 @@ var hosts =
ping_interval: one_minute,
failed_ping_interval: one_minute,
alert_to: ['ivan@iloire.com'],
- enabled: false,
+ enabled: true,
urls : [
{
method: 'get',
View
36 lib/request.js
@@ -27,42 +27,6 @@ var http = require('http');
var https = require('https');
var request = require ('request');
-/*
-//we can do this using request also
-function processRequestUsingRequestModule (url_conf, callback){
- // record start time
- var startTime = new Date();
- var headers = {
- 'Host': url_conf.host.host
- };
-
- if (url_conf.method == 'post'){
- headers['Content-Type'] = url_conf.content_type
- headers['Content-Length'] = JSON.stringify(url_conf.input_data || '').length
- }
-
- var options = {
- url : (url_conf.host.protocol || 'http') + '://' + url_conf.host.host + ':' + (url_conf.host.port || 80) + url_conf.url,
- timeout: url_conf.timeout || 10000,
- method: url_conf.method
- }
-
- if (!url_conf.expected || !url_conf.expected.contains){
- options.method = "HEAD";
- }
-
- request(options, function (error, response, body) {
- var timeDiff = (new Date() - startTime);
- if (!error) {
- callback(null, body, response, timeDiff)
- }
- else{
- callback(error, null, null, null);
- }
- })
-}
-*/
-
function processRequestUsingHttp (url_conf, callback){
// record start time
var startTime = new Date();
View
33 lib/util.js
@@ -27,6 +27,39 @@ var ONE_DAY_MS = ONE_HOUR_MS * 24
function round (val){ return (val<10) ? val = '0' + val : val; }
+function get_day_date_str (ndate){
+ var date = new Date(parseFloat(ndate))
+ return date.getDate() + "_" + (date.getMonth() + 1) + "_" + date.getFullYear()
+}
+exports.get_day_date_str = get_day_date_str;
+
+function get_hour_str (ndate){
+ var date = new Date(parseFloat(ndate))
+ return get_day_date_str(ndate) + '_' + date.getHours()
+}
+exports.get_hour_str = get_hour_str;
+
+function get_minute_str (ndate){
+ var date = new Date(parseFloat(ndate))
+ return get_hour_str(ndate) + '_' + date.getMinutes()
+}
+exports.get_minute_str = get_minute_str;
+
+function pad_date_to_minute_str (ndate){
+ var date = new Date(parseFloat(ndate))
+ var date_padded = new Date(date.getFullYear(), date.getMonth(), date.getDate(), date.getHours(), date.getMinutes(), 0, 0);
+ return date_padded.getTime();
+}
+exports.pad_date_to_minute_str = pad_date_to_minute_str;
+
+function pad_date_to_hour_str (ndate){
+ var date = new Date(parseFloat(ndate))
+ var date_padded = new Date(date.getFullYear(), date.getMonth(), date.getDate(), date.getHours(), 0, 0, 0);
+ return date_padded.getTime();
+}
+exports.pad_date_to_hour_str = pad_date_to_hour_str;
+
+
function extraTimeInfo (ndate){
if (!ndate) return "";
var date = new Date(parseFloat(ndate))
View
447 lib/watchmen.js
@@ -22,11 +22,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
-var postmark= require('./postmark');
-var sys = require('util');
-var colors = require('colors');
-var config = require ('../config');
+var postmark = require('./postmark')
+var sys = require('util')
+var colors = require('colors')
+var config = require ('../config')
var util = require ('./util')
+var email_service = require ('./email')
function log_info (str){ sys.puts (str) }
exports.log_info = log_info
@@ -42,13 +43,12 @@ exports.log_warning=log_warning
function $() { return Array.prototype.slice.call(arguments).join(':') }
-var ONE_HOUR_MS = 1000 * 60 * 60
-var ONE_DAY_MS = ONE_HOUR_MS * 24
-
-var ONE_DAY_SECS = 60 * 60 * 24
-
var current_requests = 0
+function get_url_info (host_conf, url_conf){
+ return host_conf.name + ' - ' + host_conf.host + ':'+ host_conf.port + url_conf.url + ' [' + url_conf.method + ']'
+}
+
function get_hosts (redis, hosts, callback){
var multi = redis.multi()
for (var i=0; i<hosts.length;i++){
@@ -64,179 +64,201 @@ function get_hosts (redis, hosts, callback){
multi.exec(function(err, replies) {
if (err){
- callback (err);
+ return callback (err);
}
- else{
- var counter=0
- for (i=0;i<hosts.length;i++) {
- //remove unused host fields
- delete hosts[i].alert_to
- for (var u=0;u<hosts[i].urls.length;u++){
- //remove unused url fields
- delete hosts[i].urls[u].expected
- delete hosts[i].urls[u].method
-
- //config
- hosts[i].urls[u].ping_interval = hosts[i].urls[u].ping_interval || hosts[i].ping_interval
- hosts[i].urls[u].warning_if_takes_more_than = hosts[i].urls[u].warning_if_takes_more_than || hosts[i].warning_if_takes_more_than || 0
-
- //reset fields
- hosts[i].urls[u].lastfailure = hosts[i].urls[u].lastfailuretime = null;
- hosts[i].urls[u].lastok = hosts[i].urls[u].lastoktime = null;
- hosts[i].urls[u].lastwarning = hosts[i].urls[u].lastwarningtime = null;
-
- hosts[i].urls[u].avg_response_time = null;
+
+ var counter=0
+ for (i=0;i<hosts.length;i++) {
+ //remove unused host fields
+ delete hosts[i].alert_to
+ for (var u=0;u<hosts[i].urls.length;u++){
+ //remove unused url fields
+ delete hosts[i].urls[u].expected
+ delete hosts[i].urls[u].method
+
+ var status = replies[counter];
+
+ //config
+ hosts[i].urls[u].ping_interval = hosts[i].urls[u].ping_interval || hosts[i].ping_interval
+ hosts[i].urls[u].warning_if_takes_more_than = hosts[i].urls[u].warning_if_takes_more_than || hosts[i].warning_if_takes_more_than || 0
+
+ hosts[i].urls[u].avg_response_time = Math.round(status.avg_response_time) || null;
+
+ if (!(hosts[i].urls[u].enabled==false || hosts[i].enabled==false)){ //enabled
+ hosts[i].urls[u].status = (status.status==0) ? "error" : "ok" ; //will show green while collecting data
+ }
+ else{
hosts[i].urls[u].status = "disabled";
-
- var status = replies[counter];
- if (status && status.status){
- //last failure
- if (status.lasterror){
- hosts[i].urls[u].lastfailure = ISODateOrEmpty(status.lasterror);
- hosts[i].urls[u].lastfailuretime = util.extraTimeInfo(status.lasterror)
- }
- if (status.lastok){
- hosts[i].urls[u].lastok = ISODateOrEmpty(status.lastok);
- hosts[i].urls[u].lastoktime = util.extraTimeInfo(status.lastok)
- }
- if (status.lastwarning){
- hosts[i].urls[u].lastwarning = ISODateOrEmpty(status.lastwarning);
- hosts[i].urls[u].lastwarningtime = util.extraTimeInfo(status.lastwarning)
- }
- //avg response
- hosts[i].urls[u].avg_response_time = Math.round(status.avg_response_time) || "-";
-
- if (!(hosts[i].urls[u].enabled==false || hosts[i].enabled==false))
- hosts[i].urls[u].status = (status.status==0) ? "error" : "ok" ; //will show green while collecting data
- }
- counter++;
}
+
+ hosts[i].urls[u].lastfailure = (status && status.lasterror) ? ISODateOrEmpty(status.lasterror) : null;
+ hosts[i].urls[u].lastfailuretime = (status && status.lasterror) ? util.extraTimeInfo(status.lasterror) : null;
+
+ hosts[i].urls[u].lastok = (status && status.lastok) ? ISODateOrEmpty(status.lastok) : null;
+ hosts[i].urls[u].lastoktime = (status && status.lastok) ? util.extraTimeInfo(status.lastok) : null;
+
+ hosts[i].urls[u].lastwarning = (status && status.lastwarning) ? ISODateOrEmpty(status.lastwarning) : null;
+ hosts[i].urls[u].lastwarningtime = (status && status.lastwarning) ? util.extraTimeInfo(status.lastwarning) : null;
+
+ counter++;
}
- callback(null, hosts)
}
+ callback(err, hosts)
});
}
-exports.get_hosts=get_hosts
-
-function log_general_error (redis, url_conf, msg, callback) {
- var timestamp = new Date().getTime();
- var expiration = ONE_DAY_SECS * 30; //in seconds
- multi.lpush('errors', timestamp.toISOString() + ":" + msg, function(err, data){
- callback(err)
- });
+exports.get_hosts = get_hosts
+
+
+function get_url_status (url_conf, redis, callback) {
+ redis.hgetall ($(url_conf.host.host, url_conf.host.port, url_conf.url, 'status'), callback);
}
-function createEvent(redis, url_conf, request_status, event_type, timestamp, expiration, callback){
- var timestamp = new Date().getTime();
- var multi=redis.multi()
- multi.lpush($(url_conf.host.host, url_conf.host.port, url_conf.url, 'events'), timestamp); //prepend to list of errors
- var key = $(url_conf.host.host, url_conf.host.port, url_conf.url, 'event', timestamp);
- multi.hmset(key,
- 'timestamp', timestamp,
- 'event_type', event_type,
- 'msg', request_status.msg || request_status.warning || request_status.error || "-"
- );
- multi.expire (key, expiration);
-
- multi.exec(function(err, replies) {
- callback(err);
+/*
+ - status: 1 = ok, 0 = error
+*/
+//url_conf, status, msg, event_type, request_status
+function update_status(redis, params, callback){
+ if (!params.url_conf || !params.request_status || !params.request_status.timestamp){
+ return callback ('bad parameters');
+ }
+
+ params.event_type = (params.request_status.status) ? 'success' : 'error';
+
+ var minute_time_stamp = util.get_minute_str(params.request_status.timestamp);
+ var hour_time_stamp = util.get_hour_str(params.request_status.timestamp);
+
+ var url_key_prefix = $(params.url_conf.host.host, params.url_conf.host.port, params.url_conf.url);
+
+ redis.zscore ($(url_key_prefix, util.get_day_date_str(params.request_status.timestamp), 'avg_response_time'), hour_time_stamp, function (err, avg_hour){
+ if (err) return callback (err);
+
+ redis.get ($(url_key_prefix, util.get_day_date_str(params.request_status.timestamp), 'avg_response_time', hour_time_stamp, 'counter'), function (err, counter){
+ if (err) return callback (err);
+
+ var multi = redis.multi()
+
+ //save events
+ for (var i = 0, l = params.request_status.events.length; i < l ; i++) {
+ var event_obj = {timestamp : params.request_status.timestamp, event_type: params.request_status.events[i].type, msg : params.request_status.events[i].msg }
+ multi.lpush($(url_key_prefix, 'events', params.request_status.events[i].type), JSON.stringify(event_obj));
+ };
+
+ //update status
+ var key_status = $(url_key_prefix, 'status');
+
+ multi.hmset(key_status, 'status', params.request_status.status);
+ multi.hmset(key_status, 'last' + params.event_type, params.request_status.timestamp);
+
+ if (params.event_type == 'success') {
+
+ //todo: se puede calcular el glboal avg sin tener que calcular aparte, sino cogiendo los último min??
+ //global avg
+ multi.hmset(key_status, 'avg_response_time', params.request_status.avg_response_time);
+ multi.hincrby(key_status, 'responses_count',1); //so we can calculate avg of all requests
+
+ //calc avg response time per hour
+ avg_hour = Math.round(((avg_hour || 0) * parseInt(counter || 0)
+ + params.request_status.elapsed_time) / (parseInt(counter || 0) + 1));
+
+ multi.incr ($(url_key_prefix, util.get_day_date_str(params.request_status.timestamp), 'avg_response_time', hour_time_stamp, 'counter'));
+ multi.zadd ($(url_key_prefix, util.get_day_date_str(params.request_status.timestamp), 'avg_response_time'), avg_hour, hour_time_stamp);
+
+ multi.hdel(key_status, 'down_timestamp');
+
+ if (params.request_status.warning){
+ multi.sadd ($(url_key_prefix, util.get_day_date_str(params.request_status.timestamp), 'warning_by_minute'), minute_time_stamp);
+ multi.hmset(key_status, 'lastwarning' , params.request_status.timestamp);
+ }
+ }
+ else if (params.event_type == 'error'){
+ multi.hmset(key_status, 'down_timestamp', params.request_status.down_timestamp);
+ multi.sadd ($(url_key_prefix, util.get_day_date_str(params.request_status.timestamp), 'error_by_minute'), minute_time_stamp);
+ }
+ else
+ { throw 'bad event_type' }
+
+ multi.exec(function(err, replies) {
+ callback(err);
+ });
+ });
});
}
-function record_warning (redis, url_conf, request_status, callback){
- var timestamp = new Date().getTime();
- var expiration = ONE_DAY_SECS * 10; //in seconds
- var key = $(url_conf.host.host, url_conf.host.port, url_conf.url, 'status');
- redis.hmset(key,
- 'status', 1,
- 'lastwarning', timestamp,
- function (err, data){
- if (!err)
- createEvent(redis, url_conf, request_status, 'warnings', timestamp, expiration, callback)
- }
- );
-}
+/* Will trigger when service takes more to reply than "limit" */
+function on_warning (params){
-function record_failure (redis, url_conf, request_status, callback){
- var timestamp = new Date().getTime();
- var expiration = ONE_DAY_SECS * 30; //in seconds
- redis.hmset($(url_conf.host.host, url_conf.host.port, url_conf.url, 'status'),
- 'status', 0,
- 'down_timestamp', timestamp,
- 'lasterror', timestamp,
- function (err,data){
- if (!err)
- createEvent(redis, url_conf, request_status, 'error', timestamp, expiration, callback)
- }
- );
}
-function record_success (redis, url_conf, request_status, callback){
- var timestamp = new Date().getTime();
- var expiration = ONE_DAY_SECS * 2; //in seconds
- var key = $(url_conf.host.host, url_conf.host.port, url_conf.url, 'status');
- redis.hgetall (key, function (err, status_data){
- var avg_response_time = 0;
- if (status_data){
- avg_response_time = ((status_data.avg_response_time || 0) * (status_data.responses_count || 0)
- + request_status.elapsed_time) / (parseInt(status_data.responses_count || 0) + 1);
- }
- else{
- avg_response_time = request_status.elapsed_time;
- }
- redis.hmset(key,
- 'status', 1,
- 'avg_response_time', avg_response_time,
- 'lastok', timestamp,
- function (err, data){
- redis.hincrby(key, 'responses_count',1, function (err, responses_count){
- if (!err){
- if (request_status.msg){
- createEvent(redis, url_conf, request_status, 'ok', timestamp, expiration, callback)
- }
- else
- callback(null);
- }
- else{
- callback(err);
- }
- });
- }
- );
- });
+/* Will trigger when the site is down or conditions are not satisfied */
+function on_failure (params){
+ var url_info = get_url_info (params.url_conf.host, params.url_conf)
+
+ var info = url_info + ' down!. Error: ' + params.error + '. Retrying in ' + params.request_status.next_attempt_secs / 60 + ' minute(s)..';
+ log_error (info);
+
+ if (params.request_status.previous_state.status != 1 && config.notifications.Enabled){
+ email_service.sendEmail(
+ params.url_conf.alert_to || params.url_conf.host.alert_to || config.notifications.To,
+ url_info + ' is down!', url_info + ' is down!. Reason: ' + error, function (err, data){
+ if (err){
+ console.error (url_conf + ': error sending email: ' + JSON.stringify(err));
+ }
+ });
+ }
+ else{
+ log_info ('Notification disabled or not triggered this time (site down) for ' + url_info);
+ }
}
-function sendEmail (to_list, subject, body, callback){
- postmark.sendEmail(
- {
- 'From' : config.notifications.postmark.From,
- 'To': to_list.join(','),
- 'Subject': subject,
- 'TextBody': body }, config.notifications.postmark.Api_key, function(err, data) {
-
- if (err) {
- callback (err, null);
- } else {
- callback (null,'Email sent successfully to ' + to_list.join(','))
- }
- })
+/* Will trigger when the site was down and gets back online */
+function on_site_back (params){
+ var url_info = get_url_info (params.url_conf.host, params.url_conf)
+ if (config.notifications.Enabled){
+ email_service.sendEmail(
+ params.url_conf.alert_to || params.url_conf.host.alert_to || config.notifications.To,
+ url_info + ' ' + params.msg, function(err, data){
+ if (err){
+ console.error (params.url_conf + ': error sending email: ' + JSON.stringify(err));
+ }
+ });
+ }
+ else{
+ log_info ('Notification disabled or not triggered this time (site back) for ' + url_info);
+ }
+
}
-function get_url_status (url_conf, redis, callback) {
- redis.hgetall ($(url_conf.host.host, url_conf.host.port, url_conf.url, 'status'), callback);
+/* Will trigger when the site responses successfully */
+function on_success (params){
+ //do something else here when site reports 'success'
+ var url_info = get_url_info (params.url_conf.host, params.url_conf)
+ log_ok (url_info + ' responded OK! (' + params.request_status.elapsed_time + ' milliseconds, avg: ' + params.request_status.avg_response_time + ')')
+
}
-function query_url(url_conf, redis, request, config, callback){
+function query_url(url_conf, redis, request, config, timestamp, callback){
var host_conf = url_conf.host
- var url_info = host_conf.name + ' - ' + host_conf.host + ':'+ host_conf.port + url_conf.url + ' [' + url_conf.method + ']'
- var request_status = {}
+ var url_info = get_url_info (url_conf.host, url_conf);
var error = null
- current_requests++;
+
+ current_requests++; //concurrent open requests
+
request (url_conf, function(request_err, body, response, elapsed_time){
current_requests--;
- console.log ('concurrent requests:' + current_requests);
- request_status.elapsed_time = elapsed_time
- get_url_status (url_conf, redis, function (err, status_data){
+ log_info ('concurrent requests:' + current_requests);
+
+ get_url_status (url_conf, redis, function (err, previous_state){
+ if (err) return callback (err);
+
+ var request_status = {
+ elapsed_time : elapsed_time,
+ timestamp : timestamp,
+ previous_state : previous_state,
+ events: []
+ };
+
+ //decide if service is down
+ var params = {url_conf:url_conf, request_status: request_status}
if (!request_err){ //request completed successfully
if (url_conf.expected){
if (response.statusCode != url_conf.expected.statuscode){
@@ -252,74 +274,49 @@ function query_url(url_conf, redis, request, config, callback){
error = 'Connection error when processing request: ' + request_err
}
- if (error){ //site down
- request_status.next_attempt_secs = url_conf.failed_ping_interval || url_conf.host.failed_ping_interval || 70;
- var info = url_info + ' down!. Error: ' + error + '. Retrying in ' + request_status.next_attempt_secs / 60 + ' minute(s)..';
- log_error (info);
- if ((status_data==null) || (status_data.status == 1 )){ //site down (first failure)
- if (config.notifications.Enabled){
- sendEmail(
- url_conf.alert_to || host_conf.alert_to || config.notifications.To,
- url_info + ' is down!', url_info + ' is down!. Reason: ' + error, function (err, data){
- if (err){
- log_general_error (redis, url_conf, 'Error sending email: ' + JSON.stringify(err))
- }
- });
- }
- else{
- log_info ('Notification disabled or not triggered this time');
- }
- }
- request_status.status=0
- request_status.msg = error
- record_failure (redis, url_conf, request_status, function (err){
- callback (err, request_status);
- })
- }
- else {
- //service up.
- var warning = null;
+ request_status.status = error ? 0 : 1
+
+ if (error){ //service down
+ //set new down timestamp if first hit when site down
+ request_status.down_timestamp = previous_state.down_timestamp ? previous_state.down_timestamp : request_status.timestamp;
+
+ if (previous_state.status != 0) //record event only if this is the first error for this service.
+ request_status.events.push({type: 'error' , msg: error});
- request_status.status=1
- request_status.next_attempt_secs = url_conf.ping_interval || url_conf.host.ping_interval;
+ on_failure (params);
+ }
+ else { //service is up
+ request_status.avg_response_time = Math.round(previous_state ? ((previous_state.avg_response_time || 0) * (previous_state.responses_count || 0)
+ + elapsed_time) / (parseInt(previous_state.responses_count || 0) + 1) : elapsed_time);
- //check for response time. warning?
var limit = url_conf.warning_if_takes_more_than || host_conf.warning_if_takes_more_than;
- if (limit){
- if (request_status.elapsed_time > (limit)){
- warning = 'Request for ' + url_info + ' took too much: ' + request_status.elapsed_time + ' milliseconds. Limit=' + limit + ' milliseconds'
- }
- }
-
- //service was down and now it is up again!
- if ((status_data != null) && (status_data.status == 0)){
- var down_time = Math.round((Date.now() - status_data.down_timestamp) / 1000);
- var info = url_info + ' is back!. Downtime: ' + down_time + ' seconds';
- if (config.notifications.Enabled){
- sendEmail(
- url_conf.alert_to || host_conf.alert_to || config.notifications.To,
- url_info + ' is back up!', info, function(err, data){
- if (err){
- log_general_error (redis, url_conf, 'Error sending email: ' + JSON.stringify(err))
- }
- });
- }
- request_status.msg = info;
+ if (limit && (elapsed_time > limit)){ //over the limit. warning!
+ request_status.events.push({type: 'warning' , msg: 'request for ' + url_info + ' took too much: ' + elapsed_time + ' milliseconds. Limit=' + limit + ' milliseconds'});
+ request_status.warning = true;
+
+ //todo: as warning, record just time instead of full text.
}
- //service is ok. log warning if that's the case
- log_ok (url_info + ' responded OK! (' + elapsed_time + ' milliseconds), body length:' + (body ? body.length : 0))
- record_success (redis, url_conf, request_status, function(err){
- if (warning){
- request_status.msg = warning;
- record_warning (redis, url_conf, request_status, function (err){
- callback (err, request_status);
- });
- }
- else
- callback (err, request_status);
- });
+ if ((previous_state != null) && (previous_state.status == 0)){ //service was down and now it is up again!
+ request_status.down_time = Math.round((timestamp - previous_state.down_timestamp) / 1000);
+ request_status.events.push({type: 'success' , msg: 'site is back! down_time: ' + request_status.down_time});
+
+ on_site_back (params);
+ }
+ else { //service is ok.
+ on_success (params);
+ }
}
+
+ //calculate next interval
+ request_status.next_attempt_secs = error ?
+ (url_conf.failed_ping_interval || url_conf.host.failed_ping_interval || 70) //interval if error
+ : (url_conf.ping_interval || url_conf.host.ping_interval) //interval if ok
+
+
+ update_status(redis, params, function (err){
+ callback (err, request_status);
+ })
});
})
}
@@ -327,10 +324,14 @@ exports.query_url = query_url
function processUrl (url_conf, redis){
var request = require ('./request');
- query_url(url_conf, redis, request.processRequest, config, function (err, request_status) {
- if (err)
- console.log (err)
- setTimeout (processUrl, request_status.next_attempt_secs * 1000, url_conf, redis);
+ var timestamp = new Date().getTime()
+ query_url(url_conf, redis, request.processRequest, config, timestamp, function (err, request_status) {
+ if (err){
+ console.error ('ERROR' + err);
+ }
+ else {
+ setTimeout (processUrl, request_status.next_attempt_secs * 1000, url_conf, redis);
+ }
})
}
exports.processUrl = processUrl
View
22 package.json
@@ -1,6 +1,9 @@
{
"author": "Iván Loire <ivan@iloire.com> (http://iloire.com/)",
"name": "watchmen",
+ "scripts": {
+ "test": "node tests/runtests.js"
+ },
"description": "A Node.js HTTP monitor",
"version": "0.5.0",
"homepage": "http://letsnode.com",
@@ -9,16 +12,17 @@
"url": "git://github.com/iloire/WatchMen.git"
},
"engines": {
- "node": ">=0.6.0"
+ "node": "0.6.x"
},
"dependencies": {
- "colors": ">=0.6.0",
- "ejs": ">=0.6.1",
- "express": ">=2.5.6",
- "redis": ">=0.7.1",
- "async": ">=0.1.15",
- "assert": ">=0.4.9",
- "request": ">=2.9.3"
+ "colors": "0.6.x",
+ "ejs": "0.7.x",
+ "express": "2.5.x",
+ "redis": "0.7.x",
+ "async": "0.1.x",
+ "request": "2.9.x"
},
- "devDependencies": {}
+ "devDependencies": {
+ "assert": "0.4.x"
+ }
}
View
2  redis.conf
@@ -1,5 +1,5 @@
port 1216
daemonize yes
dbfilename watchmen.rdb
-save 300 1
+save 60 1
save 60 100
View
2  server.js
@@ -33,7 +33,7 @@ var watchmen = require ('./lib/watchmen')
/*main*/
watchmen.log_info ('\nstarting watchmen...')
-watchmen.log_info ('reading configuration and queuing hosts for pinging...')
+watchmen.log_info ('reading configuration and queuing hosts...')
var initial_delay=0;
for (var i=0; i<config.hosts.length;i++){
View
519 tests/runtests.js
@@ -1,4 +1,6 @@
var watchmen = require ('../lib/watchmen.js')
+var util = require ('../lib/util.js')
+var reports = require ('../lib/reports.js')
var config = require ('../config.js')
var async = require ('async')
var assert = require ('assert')
@@ -15,12 +17,13 @@ function printCurrentTest() {
}
var request_mocked = require ('./lib/request_mocked')
+var timestamp = util.pad_date_to_minute_str(new Date().getTime());
-var tests= [
+var tests = [
function setup_tests(callback){
printCurrentTest();
console.log ("-----------------")
- redis.flushall(function(err, data){
+ redis.flushall(function(err, data){ //clear database
callback(null,null);
});
}
@@ -36,29 +39,48 @@ var tests= [
expected : {statuscode: 200, contains: ''}
}
- var request_mocked = require ('./lib/request_mocked')
- request_mocked.mocked_response = {error: 'error', body : null, response : null, timeDiff : 0};
+ var minute_time_stamp = util.get_minute_str(timestamp);
- watchmen.query_url(url, redis, request_mocked.processRequest, config, function(err, data){
+ request_mocked.mocked_response = {error: 'error', body : null, response : null, timeDiff : 0};
+
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, request_status){
assert.ok (!err)
- assert.equal (data.status, 0)
- assert.ok (data.msg.indexOf('Connection error')>-1)
- assert.ok (data.next_attempt_secs, 4);
- assert.equal (data.next_attempt_secs, url.failed_ping_interval);
+ assert.equal (request_status.status, 0)
+ assert.equal (request_status.next_attempt_secs, url.failed_ping_interval);
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err,data){
- assert.ok (!data.lastwarning)
- assert.ok (!data.lastok)
- assert.ok (data.lasterror)
-
+ //check status
+ redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err,status){
assert.ok (!err)
- assert.equal (data.status,0)
- redis.lrange ($(url.host.host, url.host.port, url.url, 'events'), 0, 100, function(err, timestamps) {
+
+ assert.ok (!status.lastwarning)
+ assert.ok (!status.lastsuccess)
+ assert.equal (status.lasterror, timestamp)
+ assert.equal (status.down_timestamp, timestamp);
+ assert.equal (status.status,0)
+
+ //one event
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','error'), 0, 100, function(err, events) {
assert.ok (!err)
- assert.equal (timestamps.length, 1)
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'event', timestamps[0]), function (err, event_data){
- assert.ok (event_data.msg.indexOf('error')>-1);
- callback (null,null)
+ assert.equal (events.length, 1)
+ var event_obj = JSON.parse(events[0])
+ assert.equal (event_obj.event_type, 'error')
+
+ //one error
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'error_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,1);
+
+ //no warnings
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'warning_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,0);
+
+ redis.get ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time', minute_time_stamp, 'counter'), function(err, counter){
+ assert.ok (!err)
+ assert.ok (!counter)
+ callback (null,null)
+ });
+ });
});
});
});
@@ -76,35 +98,136 @@ var tests= [
expected : {statuscode: 200, contains: 'hola'}
}
- var request_mocked = require ('./lib/request_mocked')
+ var minute_time_stamp = util.get_minute_str(timestamp);
+ var hour_time_stamp = util.get_hour_str (timestamp);
+
request_mocked.mocked_response = {error: null, body : 'hola', response : {statusCode: 200}, timeDiff : 300};
-
- watchmen.query_url(url, redis, request_mocked.processRequest, config, function(err, data){
- assert.ok (!err)
+
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, data){
+ assert.ok (!err, err)
+
assert.equal (data.status, 1)
- assert.ok (data.msg==null);
- assert.ok (data.next_attempt_secs, 60);
- assert.ok (data.elapsed_time, 230)
+ assert.equal (data.msg,null);
+ assert.equal (data.next_attempt_secs, 60);
+ assert.ok (data.elapsed_time, 'Elapsed time not found')
assert.equal (data.next_attempt_secs, url.ping_interval);
-
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err,data){
+ assert.ok (!data.down_timestamp, data.down_timestamp)
+
+ redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err, data){
+ assert.ok (!err)
+
assert.ok (!data.lastwarning)
- assert.ok (data.lastok)
+ assert.ok (data.lastsuccess)
assert.equal (data.lasterror)
- assert.ok (!err)
assert.equal (data.status,1)
assert.equal (data.avg_response_time, 300)
- redis.lrange ($(url.host.host, url.host.port, url.url, 'events'), 0, 100, function(err, timestamps) {
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','error'), 0, 100, function(err, events) {
assert.ok (!err)
- assert.equal (timestamps.length, 0)
- callback (null,null)
+ assert.equal (events.length, 0)
+
+ //no errors
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'error_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,0);
+
+ //no warnings
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'warning_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,0);
+
+ //we are starting calc avg response time per minute
+ redis.get ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time', hour_time_stamp, 'counter'), function(err, counter){
+ assert.ok (!err)
+ assert.equal (counter, 1)
+
+ //make sure we have a new member in sorted set for this minute with score = avg_response_time
+ redis.zscore ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time'), hour_time_stamp, function(err, avg_response_time){
+ assert.ok(!err)
+ assert.equal (avg_response_time, request_mocked.mocked_response.timeDiff)
+ callback (null,null)
+ });
+ });
+ });
+ });
});
});
})
}
,
- function test_response_ok_expected_status_code_fails (callback){
+ function test_got_another_response_ok (callback){
+ printCurrentTest();
+ var url = {
+ host : {host: 'www.google.com', port:'80', name : 'test'},
+ url : '/',
+ ping_interval: 60,
+ failed_ping_interval:30,
+ method : 'get',
+ expected : {statuscode: 200, contains: 'hola'}
+ }
+
+ timestamp = timestamp + (1000 * 10); //10 seconds later
+
+ var minute_time_stamp = util.get_minute_str(timestamp);
+ var hour_time_stamp = util.get_hour_str (timestamp);
+
+ request_mocked.mocked_response = {error: null, body : 'hola', response : {statusCode: 200}, timeDiff : 500};
+
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, data){
+ assert.ok (!err)
+
+ assert.equal (data.status, 1)
+ assert.equal (data.msg,null)
+ assert.equal (data.next_attempt_secs, 60)
+ assert.ok (data.elapsed_time, 'Elapsed time not found')
+ assert.equal (data.next_attempt_secs, url.ping_interval)
+ assert.ok (!data.down_timestamp, data.down_timestamp)
+
+ redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err, data){
+ assert.ok (!err)
+
+ assert.ok (!data.lastwarning)
+ assert.ok (data.lastsuccess)
+ assert.equal (data.lasterror)
+
+ assert.equal (data.status,1)
+ assert.equal (data.avg_response_time, 400) //the avg between 300 and 500
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','error'), 0, 100, function(err, events) {
+ assert.ok (!err)
+ assert.equal (events.length, 0)
+
+ //no errors
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'error_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,0)
+
+ //no warnings
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'warning_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,0)
+
+ //check avg response
+ redis.get ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time', hour_time_stamp, 'counter'), function(err, counter){
+ assert.ok (!err)
+ assert.equal (counter, 2)
+
+ //make sure we have a new member in sorted set for this minute with score = avg_response_time
+ redis.zscore ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time'), hour_time_stamp, function(err, avg_response_time){
+ assert.ok(!err)
+ assert.equal (avg_response_time, 400) //avg of the last 2 calls (300 and 500)
+ callback (null,null)
+ });
+ });
+ });
+ });
+ });
+ });
+ });
+ }
+ ,
+ function test_expected_status_code_fails (callback){
var url = {
host : {host: 'www.google.com', port:'80', name : 'test'},
url : '/',
@@ -114,34 +237,79 @@ var tests= [
expected : {statuscode: 200, contains: 'hola'}
}
- var request_mocked = require ('./lib/request_mocked')
+ timestamp = timestamp + (1000 * 20); //20 seconds later
+
+ var minute_time_stamp = util.get_minute_str(timestamp);
+ var hour_time_stamp = util.get_hour_str (timestamp);
+
request_mocked.mocked_response = {error: null, body : 'hola', response : {statusCode: 301}, timeDiff : 0};
- watchmen.query_url(url, redis, request_mocked.processRequest, config, function(err, data){
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, data){
assert.ok (!err)
assert.equal (data.status, 0)
- assert.ok (data.msg.indexOf('expected status code')>-1);
assert.equal (data.next_attempt_secs, url.failed_ping_interval);
redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err,data){
+ assert.ok (!err)
assert.ok (!data.lastwarning)
- assert.ok (data.lastok)
+ assert.ok (data.lastsuccess)
assert.ok (data.lasterror)
-
- assert.ok (!err)
+ assert.equal (data.down_timestamp, timestamp);
+ assert.equal (data.avg_response_time, 400) //the avg still 400
assert.equal (data.status, 0)
- redis.lrange ($(url.host.host, url.host.port, url.url, 'events'), 0, 100, function(err, timestamps) {
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','error'), 0, 100, function(err, events) {
assert.ok (!err)
- assert.equal (timestamps.length, 1)
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'event', timestamps[0]), function (err, event_data){
- assert.ok (event_data.msg.indexOf('FAILED! expected status')>-1, event_data);
- callback (null,null)
+ assert.equal (events.length, 1)
+ var event_obj = JSON.parse(events[0])
+ assert.equal (event_obj.event_type, 'error')
+ assert.ok (event_obj.msg.indexOf('status code')>-1)
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','success'), 0, 100, function(err, events) {
+ assert.ok (!err)
+ assert.equal (events.length, 0)
+
+ //one error
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'error_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,1);
+
+ //zero warning
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'warning_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,0);
+
+ //check avg response. This haven't changed from last request (we got an error)
+ redis.get ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time', hour_time_stamp, 'counter'), function(err, counter){
+ assert.ok (!err)
+ assert.equal (counter, 2) //didn't change
+
+ //make sure we have a new member in sorted set for this minute with score = avg_response_time
+ redis.zscore ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time'), hour_time_stamp, function(err, avg_response_time){
+ assert.ok(!err)
+ assert.equal (avg_response_time, 400) //this did't change either
+
+ //reports module should give this exactly same information
+ reports.get_reports_by_host(redis, url.url, url.host.host, url.host.port, function (err, reports){
+ assert.ok(!err);
+ assert.equal(reports.logs_warning.length, 0);
+ assert.equal(reports.logs_critical.length, 1);
+ assert.equal(reports.logs_success.length, 0);
+ assert.equal(reports.report_by_day[0].minutes_with_warnings, 0)
+ assert.equal(reports.report_by_day[0].minutes_with_errors, 1)
+ callback (null,null)
+ })
+
+ });
+ });
+ });
+ });
});
});
});
})
}
,
- function test_response_ok_with_warning (callback){
+ function test_response_ok_with_warning (callback){ //site is dns.resolve4(name, callback);, got warning
printCurrentTest();
var url = {
host : {host: 'www.google.com', port:'80', name : 'test'},
@@ -153,35 +321,75 @@ var tests= [
expected : {statuscode: 200, contains: 'hola'}
}
- var request_mocked = require ('./lib/request_mocked')
- request_mocked.mocked_response = {error: null, body : 'hola', response : {statusCode: 200}, timeDiff : 500};
+ timestamp = timestamp + (1000 * 25); //25 seconds later (same minute) // 55'
+
+ var minute_time_stamp = util.get_minute_str(timestamp);
+ var hour_time_stamp = util.get_hour_str (timestamp);
+
+ request_mocked.mocked_response = {error: null, body : 'hola', response : {statusCode: 200}, timeDiff : 700};
- watchmen.query_url(url, redis, request_mocked.processRequest, config, function(err, data){
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, data){
assert.ok (!err)
assert.equal (data.status, 1)
- assert.ok (data.msg.indexOf('took too much')>-1, data.msg)
assert.equal (data.next_attempt_secs, url.ping_interval);
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err,data){
- assert.ok (data.lastwarning)
- assert.ok (data.lastok)
- assert.ok (data.lasterror)
+ assert.equal (data.down_time, 25);
+ redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err, data){
assert.ok (!err)
+ assert.ok (data.lastwarning)
+ assert.ok (data.lastsuccess)
+ assert.ok (data.lasterror)
assert.equal (data.status, 1)
- assert.equal (data.avg_response_time, 400)
- redis.lrange ($(url.host.host, url.host.port, url.url, 'events'), 0, 100, function(err, events) {
+ assert.equal (data.avg_response_time, 500)
+ assert.ok (!data.down_timestamp);
+
+ //site back
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','success'), 0, 100, function(err, events) {
assert.ok (!err)
- assert.equal (events.length, 3)
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'event', events[0]), function (err, event_data){
- assert.ok (event_data.msg.indexOf('took too much')>-1, JSON.stringify(event_data));
- callback (null,null)
+ assert.equal (events.length, 1)
+ var event_back = JSON.parse(events[0])
+ assert.ok(event_back.msg.indexOf('site is back! down_time: 25')>-1, event_back.msg)
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','error'), 0, 100, function(err, events) {
+ assert.ok (!err)
+ assert.equal (events.length, 1)
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','warning'), 0, 100, function(err, events) {
+ assert.equal (events.length, 1) //we got the event for failing + event for site back + event for warning
+ var event_obj = JSON.parse(events[0])
+ assert.ok (event_obj.msg.indexOf('took too much')>-1, JSON.stringify(events));
+
+ //one error
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'error_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,1);
+ //first warning
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'warning_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,1);
+
+ //check avg response. This haven't changed from last request (we got an error)
+ redis.get ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time', hour_time_stamp, 'counter'), function(err, counter){
+ assert.ok (!err)
+ assert.equal (counter, 3)
+
+ //make sure we have a new member in sorted set for this minute with score = avg_response_time
+ redis.zscore ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'avg_response_time'), hour_time_stamp, function(err, avg_response_time){
+ assert.ok(!err)
+ assert.equal (avg_response_time, 500) //avg of the last 3 calls (300, 500, 700)
+ callback (null,null)
+ });
+ });
+ });
+ });
+ });
});
});
});
})
}
,
- function test_response_ok_expected_text_fails (callback){
+ function test_expected_text_fails (callback){
printCurrentTest();
var url = {
host : {host: 'www.google.com', port:'80', name : 'test'},
@@ -191,82 +399,159 @@ var tests= [
method : 'get',
expected : {statuscode: 200, contains: 'hola'}
}
-
- var request_mocked = require ('./lib/request_mocked')
+
+ timestamp = timestamp + 1000; //1 second later (same minute) // 56'
+
request_mocked.mocked_response = {error: null, body : '', response : {statusCode: 200}, timeDiff : 0};
- watchmen.query_url(url, redis, request_mocked.processRequest, config, function(err, data){
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, data){
assert.ok (!err)
assert.equal (data.status, 0)
- assert.ok (data.msg.indexOf('expected text')>-1);
assert.equal (data.next_attempt_secs, 70);
redis.hgetall ($(url.host.host, url.host.port, url.url, 'status'), function (err,data){
+
+ assert.ok (!err)
assert.ok (data.lastwarning)
- assert.ok (data.lastok)
+ assert.ok (data.lastsuccess)
assert.ok (data.lasterror)
-
- assert.ok (!err)
+ assert.equal (data.down_timestamp, timestamp);
+ assert.equal (data.avg_response_time, 500)
assert.equal (data.status,0)
- redis.lrange ($(url.host.host, url.host.port, url.url, 'events'), 0, 100, function(err, timestamps) {
+
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','error'), 0, 100, function(err, timestamps) {
assert.ok (!err)
- assert.equal (timestamps.length, 4)
- redis.hgetall ($(url.host.host, url.host.port, url.url, 'event', timestamps[0]), function (err, event_data){
- if (!err){
- assert.ok (event_data.msg.indexOf('FAILED! expected text')>-1, event_data);
- assert.equal (timestamps.length,4)
- callback (null,null)
- }
- else{
- console.log(err)
- }
+ assert.equal (timestamps.length, 2)
+ redis.lrange ($(url.host.host, url.host.port, url.url, 'events','warning'), 0, 100, function(err, timestamps) {
+ assert.ok (!err)
+ assert.equal (timestamps.length, 1)
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'error_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,1); //same minute, just one error mark per minute
+ redis.scard ($(url.host.host, url.host.port, url.url, util.get_day_date_str(timestamp), 'warning_by_minute'), function(err, number_values){
+ assert.ok (!err)
+ assert.equal(number_values,1);
+ callback (null,null)
+ });
+ });
});
+
});
});
})
}
,
- function test_display_hosts (callback){
-
+ function test_another_error_downtime_check (callback){
printCurrentTest();
-
- function get_urls_from_hosts (hosts){
- var urls = []
- for (var i=0;i<hosts.length;i++){
- for (var u=0;u<hosts[i].urls.length;u++){
- var url = hosts[i].urls[u];
- url.host = hosts[i];
- urls.push (url);
- }
- }
- return urls;
- }
-
- function get_hosts_status (hosts, callback){
- watchmen.get_hosts(redis, hosts, function(err, hosts){
- callback (err, hosts);
- })
+ var url = {
+ host : {host: 'www.google.com', port:'80', name : 'test'},
+ url : '/',
+ ping_interval: 4,
+ failed_ping_interval:30,
+ method : 'get',
+ expected : {statuscode: 200, contains: 'hola'}
}
+ timestamp = timestamp + 1000; //1 second later (same minute) // 57'
- var urls = get_urls_from_hosts(config.hosts);
+ request_mocked.mocked_response = {error: null, body : '', response : {statusCode: 200}, timeDiff : 0};
- function process_mocked_call(url, callback){
- request_mocked.mocked_response = {error: null,
- body : url.expected ? url.expected.contains : "",
- response : {statusCode: url.expected ? url.expected.statuscode :200},
- timeDiff : 0};
- watchmen.query_url(url, redis, request_mocked.processRequest, config, function(err, data){
- callback(err, data);
- });
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, status){
+ assert.ok (!err)
+ assert.equal (status.status, 0)
+ assert.equal (status.next_attempt_secs, 30);
+ assert.equal (status.down_timestamp, (timestamp - 1000));
+
+ reports.get_reports_by_host(redis, url.url, url.host.host, url.host.port, function (err, reports){
+ assert.ok(!err);
+ assert.equal (reports.status.avg_response_time, 500)
+ assert.equal(reports.logs_warning.length, 1);
+ assert.equal(reports.logs_critical.length, 2); //this is the second error in a row. we only record the first one to avoid having an error every minute or so.
+ assert.equal(reports.logs_success.length, 1);
+ assert.equal(reports.report_by_day[0].minutes_with_warnings, 1)
+ assert.equal(reports.report_by_day[0].minutes_with_errors, 1)
+ callback (null,null)
+ })
+ })
+ }
+ ,
+ function test_avg_next_minute (callback){
+ printCurrentTest();
+ var url = {
+ host : {host: 'www.google.com', port:'80', name : 'test'},
+ url : '/',
+ ping_interval: 4,
+ //failed_ping_interval:30,
+ method : 'get',
+ expected : {statuscode: 200, contains: ''}
}
+ timestamp = timestamp + (1000 * 10); //jump to next minute
+ request_mocked.mocked_response = {error: null, body : '', response : {statusCode: 200}, timeDiff : 300};
+
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, status){
+ assert.ok (!err)
+ assert.equal (status.status, 1)
+ assert.equal (status.next_attempt_secs, 4);
+
+ reports.get_reports_by_host(redis, url.url, url.host.host, url.host.port, function (err, reports){
+ assert.ok(!err);
+ assert.equal(reports.logs_warning.length, 1);
+ assert.equal(reports.logs_critical.length, 2);
+ assert.equal(reports.logs_success.length, 2);
+ assert.equal(reports.report_by_day[0].minutes_with_warnings, 1)
+ assert.equal(reports.report_by_day[0].minutes_with_errors, 1)
+ callback (null,null)
+ })
+ })
+ }
+ ,
+ function test_display_hosts (callback){
- async.map (urls, process_mocked_call, function (err, results){
- for (var i=0;i<results.length;i++) {
- assert.ok (results[i].status, JSON.stringify(results[i]));
- assert.ok (results[i].avg_response_time==null);
+ printCurrentTest();
+ watchmen.get_hosts(redis, config.hosts, function(err, hosts){
+ assert.equal (hosts.length, 20);
+
+ for (var i = 0, l = hosts.length; i < l ; i++) {
+ var host = hosts[i]
+ assert.ok(host.name);
+ assert.ok(host.host);
+ assert.ok(host.port);
+ assert.ok(host.urls);
+ for (var u = 0, ul = host.urls.length; u < ul ; u++) {
+ var url = host.urls[u];
+ assert.ok(url.ping_interval, url.ping_interval);
+ };
+ };
+
+ function process_mocked_call(url, callback){
+ request_mocked.mocked_response = {error: null,
+ body : url.expected ? url.expected.contains : "",
+ response : {statusCode: url.expected ? url.expected.statuscode :200},
+ timeDiff : 0};
+ watchmen.query_url(url, redis, request_mocked.processRequest, config, timestamp, function(err, data){
+ callback(err, data);
+ });
}
- callback(null,null);
+
+ function get_urls_from_hosts (hosts){
+ var urls = []
+ for (var i=0;i<hosts.length;i++){
+ for (var u=0;u<hosts[i].urls.length;u++){
+ var url = hosts[i].urls[u];
+ url.host = hosts[i];
+ urls.push (url);
+ }
+ }
+ return urls;
+ }
+
+ var urls = get_urls_from_hosts(config.hosts);
+
+ async.map (urls, process_mocked_call, function (err, results){
+ for (var i=0;i<results.length;i++) {
+ assert.ok (results[i].status, JSON.stringify(results[i]));
+ }
+ callback(null,null);
+ });
});
-
}
,
function finish(callback){
@@ -302,7 +587,7 @@ function series(tests, callback) {
}
series(tests, function(err, results) {
- console.log ("All good!")
- redis.quit();
- process.exit(1);
+ console.log ("All good!");
+ redis.quit();
+ process.exit(0);
})
View
94 webserver/app.js
@@ -26,16 +26,17 @@ THE SOFTWARE.
var express = require('express');
var app = module.exports = express.createServer();
var config = require('../config')
-
var redis = require("redis").createClient(config.database.port, config.database.host);
redis.select (config.database.db);
var util = require('../lib/util')
var watchmen = require('../lib/watchmen')
+var reports = require('../lib/reports')
app.configure(function(){
app.set('views', __dirname + '/views');
app.set('view engine', 'ejs');
+ app.register('.html', require("ejs")); //register .html extension with ejs view render
app.use(express.bodyParser());
app.use(express.methodOverride());
app.use(app.router);
@@ -50,100 +51,27 @@ app.configure('production', function(){
app.use(express.errorHandler());
});
-function $() { return Array.prototype.slice.call(arguments).join(':') }
-
-function getEvents (url_conf, host, max, callback){
- var events = []
- var key = $(host.host, host.port, url_conf.url, 'events')
- redis.lrange (key, 0, max, function(err, timestamps) {
- var multi = redis.multi()
- for (i=0;i<timestamps.length;i++) {
- key = $(host.host, host.port, url_conf.url, 'event', timestamps[i])
- multi.hgetall (key);
- }
-
- multi.exec(function(err, replies) {
- for (i=0;i<(replies.length-1);i++){
- if (!replies[i]){
- redis.lrem ($(host, port, url, 'events'), 1, timestamps[i]) //event has expired. removing from list.
- }
- else{
- events.push (replies[i]);
- }
- }
- callback (events);
- });
- });
-}
-
//url log detail
app.get('/log', function(req, res){
- var host = req.query ['host'], url = req.query ['url'], port = req.query['port']
- var oHost = null, oUrl=null;
- for (i=0;i<config.hosts.length;i++) {
- for (var u=0;u<config.hosts[i].urls.length;u++){
- if ((config.hosts[i].host==host) && (config.hosts[i].port==port) && (config.hosts[i].urls[u].url==url)){
- oUrl = config.hosts[i].urls[u];
- oHost = config.hosts[i];
- break;
- }
+ var max = 100;
+ reports.get_reports_by_host (redis, req.query['url'], req.query['host'], req.query['port'], function (err, data){
+ if (err){
+ return res.end(err)
}
- }
-
- if (oUrl && oHost){
- var logs_warning = [], logs_critical = [];
-
- getEvents (oUrl, oHost, 100, function (events){
- var key = $(oHost.host, oHost.port, oUrl.url, 'status');
- redis.hget(key, 'status', function(err, data){
- if (!err){
- for (var i=0;i<events.length;i++){
- if (data.event_type == "warning"){
- logs_warning.push (events[i])
- }
- else{
- logs_critical.push (events[i])
- }
- }
- var url_status = '';
- if ((oUrl.enabled == false) || (oHost.enabled == false)){
- url_status="disabled"
- }
- else {
- url_status = (data==1) ? "ok": "error";
- }
- res.render('entry_logs',
- {
- title: oHost.name + ' (' + host + ':' + port+ url + ') status history',
- url_status : url_status,
- logs_warning: logs_warning,
- logs_critical: logs_critical
- });
- }
- else{
- console.log (err)
- res.end ('Error (see console)')
- }
- })
- });
- }
- else{
- res.end ('host/url not found')
- }
+ console.log(data);
+ res.render("entry_logs.html", data);
+ })
});
//list of hosts and url's
app.get('/', function(req, res){
- res.render('index', {title: 'watchmen'});
+ res.render('index.html', {title: 'watchmen'});
});
-
app.get('/getdata', function(req, res){
watchmen.get_hosts(redis, config.hosts, function (err, hosts){
- var headers = {'Content-type' : 'application/json;charset=utf8'}
- res.writeHead(200, headers)
- res.end(JSON.stringify({hosts:hosts, timestamp: util.extraTimeInfo(new Date().getTime())}));
+ return res.json ({hosts:hosts, timestamp: util.extraTimeInfo(new Date().getTime())})
})
});
View
54 webserver/views/entry_logs.html
@@ -0,0 +1,54 @@
+<a style="float:right" href="/" class="btn primary">back to list</a>
+<h2><%=title%></h2>
+
+<h2 class="<%=url_status%>"><img class="status_icon" src="/images/<%=url_status%>.png" alt="<%=url_status%>" /> <%=url_status%></h2>
+
+<ul>
+<% for (var day in today_avg_info){ %>
+ <% for (var h in today_avg_info[day]){ %>
+ <li>
+ <%=day%> / <%=h%>:
+ <% if (today_avg_info[day][h]){ %>
+ <%=today_avg_info[day][h]%>
+ <%} else {%>
+ 0
+ <%}%>
+ </li>
+ <%}%>
+<%}%>
+</ul>
+
+<script type="text/javascript" src="https://www.google.com/jsapi"></script>
+<% for (var i=0; i<report_by_day.length;i++){ %>
+ <script type="text/javascript">
+ google.load("visualization", "1", {packages:["corechart"]});
+ google.setOnLoadCallback(drawChart);
+ function drawChart() {
+ var data = new google.visualization.DataTable();
+ data.addColumn('string', 'Task');
+ data.addColumn('number', 'Hours per Day');
+ data.addRows([
+ ['Down', <%=Math.round (report_by_day[i].minutes_with_errors) %>],
+ ['Warning', <%=Math.round (report_by_day[i].minutes_with_warnings) %>],
+ ['Up', <%=Math.round (60 * 24 - report_by_day[i].minutes_with_warnings - report_by_day[i].minutes_with_errors) %>]
+ ]);
+
+ var options = {
+ title: 'Site up',
+ colors: ['red','orange','green']
+ };
+
+ var chart = new google.visualization.PieChart(document.getElementById('chart_div<%=i%>'));
+ chart.draw(data, options);
+ }
+ </script>
+
+ <div id="chart_div<%=i%>" style="width: 300px; height: 250px; float:left"></div>
+<%}%>
+
+<div style="clear:both"></div>
+
+<%-partial ('partial/display_logs_list', {logs:logs_critical, log_type:'Critical'})%>
+<%-partial ('partial/display_logs_list', {logs:logs_warning, log_type:'Warning'})%>
+
+<a href="/" class="btn primary">back to list</a>
View
86 webserver/views/index.html
@@ -0,0 +1,86 @@
+<script id='urlTemplate' type="text/x-jquery-tmpl">
+ <tr class="${status}">
+ <td class="icon">
+ <span>${status}</span>
+ </td>
+ <td class="hosturl">
+ <a target="_blank" href="${host.protocol || 'http'}://${host.host}:${host.port}${url}">
+ ${host.name} ${url}
+ </a>
+ </td>
+
+ <td>${ping_interval}</td>
+
+ <td>${warning_if_takes_more_than}</td>
+
+ <td class="avgresponse">
+ ${avg_response_time}
+ </td>
+ <td>
+
+ </td>
+ <td>
+ <img style="width:100px;height:30px" src="https://chart.googleapis.com/chart?chs=200x125&cht=lc&chco=0077CC&chxt=y&chd=t:27,25,60,31,25,39,25,6,26,28,80,28,27,31,27,29,26,35,70,25&chm=H,FF0000,0,18,1">
+ </td>
+ <td>
+ {{if (lastfailuretime)}}
+ <span class="error">
+ ${lastfailuretime} <br /><time class='timeago' datetime='${lastfailure}'>${lastfailure}</time>
+ </span>
+ {{/if}}
+ </td>
+ <td>
+ {{if (lastwarningtime)}}
+ <span class="warning">
+ ${lastwarningtime} <br /><time class='timeago' datetime='${lastwarning}'>${lastwarning}</time>
+ </span>
+ {{/if}}
+ </td>
+ <td>
+ {{if (lastoktime)}}
+ <span class="ok">
+ ${lastoktime} <br /><time class='timeago' datetime='${lastok}'>${lastok}</time>
+ </span>
+ {{/if}}
+ </td>
+ <td>
+ <a class="btn primary" href="/log?url=${url}&host=${host.host}&port=${host.port}">details</a>
+ </td>
+ </tr>
+</script>
+
+<div class="filter">
+ filter : <input placeholder="filter by host name" data-bind="value: filter, valueUpdate: 'afterkeydown'" />
+</div>
+
+<div class="overview">
+ <span class="up" data-bind="text: hostsUp"></span>
+ +
+ <span class="down" data-bind="text: hostsDown"></span>
+ =
+ <span class="total" data-bind="text: hostsTotal"></span>
+</div>
+
+<table >
+ <thead>
+ <th></th>
+ <th>HOST+URL</th>
+ <th width="50" class = "{sorter: 'digit'}">Ping (sec)</th>
+ <th width="80" class = "{sorter: 'digit'}">Response limit (ms)</th>
+ <th width="150" class = "{sorter: 'digit'}">AVG Response time (ms)</th>
+ <td width="150">UP TIME</th>
+ <td width="150"></th>
+ <th width="150" class="error">Last failure</th>
+ <th width="150" class="warning">Last warning</th>
+ <th width="150" class="ok">Last ok</th>
+ <th width="100"></th>
+ </thead>
+ <tbody id="items" data-bind='template: {name:"urlTemplate", foreach: filteredUrls}'>
+ </tbody>
+</table>
+
+<script>
+$(document).ready(function() {
+ Refresh();
+});
+</script>
View
49 webserver/views/layout.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <title>watchmen, http monitor for node.js - <%= title %></title>
+ <link rel='stylesheet' href='/stylesheets/bootstrap.min.css' />
+ <link rel='stylesheet' href='/stylesheets/style.css' />
+ <link rel='stylesheet' href='/stylesheets/tablesorterblue/style.css' />
+ <script src="/js/jquery.min.js" type="text/javascript"></script>
+ <script src="/js/jquery.tmpl.min.js " type="text/javascript"></script>
+ <script src="/js/knockout-2.0.0.js " type="text/javascript"></script>
+ <script src="/js/jquery.timeago.js" type="text/javascript"></script>
+ <script src="/js/jquery.tablesorter.min.js"></script>
+ <script src="/js/watchmen.js"></script>
+ </head>
+ <body>
+ <div class="topbar">
+ <div class="fill">
+ <div class="container-fluid">
+ <h3><a href="/">watchmen <small>node.js http monitor</small></a></h3>
+ <ul class="nav secondary-nav">
+ <li>
+ <a target="_blank" href="/">last update: <span id="last_update" data-bind="html: lastupdate"></span></a>
+ </li>
+ <li class="dropdown">
+ <a target="_blank" href="https://github.com/iloire/WatchMen">Github source code</a>
+ </li>
+ <li>
+ <a target="_blank" href="http://letsnode.com">letsnode.com</a>
+ </li>
+ </ul>
+ </div>
+ </div>
+ </div>
+ <div class="container-fluid">
+ <%- body %>
+ </div>
+ <script type="text/javascript">
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-300651-33']);
+ _gaq.push(['_trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+ </script>
+ </body>
+</html>
View
23 webserver/views/partial/display_logs_list.html
@@ -0,0 +1,23 @@
+<h3><%=log_type%></h3>
+<%if (logs.length) {%>
+ <table class="events">
+ <%
+ for (var i=0; i<logs.length;i++){
+ var log = logs[i];
+ %>
+ <tr>
+ <td>
+ <%=new Date(parseFloat(log.timestamp)).toUTCString()%>
+ <time class="timeago" datetime="<%=new Date(parseFloat(log.timestamp)).toISOString()%>"><%=new Date(parseFloat(log.timestamp)).toISOString()%></time>
+ </td>
+ <td class="msg">
+ <span class="<%=log.event_type%>"><%=log.msg%></span>
+ </td>
+ </tr>
+ <%
+ }
+ %>
+ </table>
+<% } else {%>
+ <p>No <%=log_type%> logs found</p>
+<%}%>
Please sign in to comment.
Something went wrong with that request. Please try again.