Permalink
Browse files

refactoring of the config file

  • Loading branch information...
1 parent fa5f428 commit 284f199284a85413b54f816a590b5963f7f82092 @hbouvier committed Mar 4, 2012
Showing with 230 additions and 55 deletions.
  1. +230 −55 watchdog.js
View
@@ -10,10 +10,151 @@ var util = require('util'),
server = null,
appName = getAppName(process.argv[1]),
configfile = '../config/'+appName+'.json',
- newConfig = {},
config = {},
logstream = fs.createWriteStream(appName + '.log');
+
+///////////////////////////////////////////////////////////////////////////////
+process.on('exit', function (code) {
+ log('EVENT:exit [code:' + code + '] --- SHUTING DOWN NODE-WATCHDOG');
+ shutdown();
+});
+
+process.on('uncaughtException', function (err) {
+ log('EVENT:uncaughtException [' + err + '] --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+
+process.on('SIGHUP', function () {
+ log('EVENT:SIGHUP --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGINT', function () {
+ log('EVENT:SIGINT (CTRL-C) --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGQUIT', function () {
+ log('EVENT:SIGQUIT --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGILL', function () {
+ log('EVENT:SIGILL --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGTRAP', function () {
+ log('EVENT:SIGTRAP --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGABRT', function () {
+ log('EVENT:SIGABRT --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGEMT', function () {
+ log('EVENT:SIGEMT --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGFPE', function () {
+ log('EVENT:SIGFPE --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGKILL', function () {
+ log('EVENT:SIGKILL --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGBUS', function () {
+ log('EVENT:SIGBUS --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGSEGV', function () {
+ log('EVENT:SIGSEGV --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGSYS', function () {
+ log('EVENT:SIGSYS --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGPIPE', function () {
+ log('EVENT:SIGPIPE --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGALRM', function () {
+ log('EVENT:SIGALRM --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGTERM', function () {
+ log('EVENT:SIGTERM --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGURG', function () {
+ log('EVENT:SIGURG --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+/*
+process.on('SIGSTOP', function () {
+ log('EVENT:SIGSTOP --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGTSTP', function () {
+ log('EVENT:SIGTSTP --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGCONT', function () {
+ log('EVENT:SIGCONT --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+
+process.on('SIGCHLD', function () {
+ log('EVENT:SIGCHLD --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+*/
+process.on('SIGTTIN', function () {
+ log('EVENT:SIGTTIN --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGTTOU', function () {
+ log('EVENT:SIGTTOU --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGIO', function () {
+ log('EVENT:SIGIO --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGXCPU', function () {
+ log('EVENT:SIGXCPU --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGXFSZ', function () {
+ log('EVENT:SIGXFSZ --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGVTALRM', function () {
+ log('EVENT:SIGVTALRM --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGPROF', function () {
+ log('EVENT:SIGPROF --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGWINCH', function () {
+ log('EVENT:SIGWINCH --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGINFO', function () {
+ log('EVENT:SIGINFO --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGUSR1', function () {
+ log('EVENT:SIGUSR1 --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+process.on('SIGUSR2', function () {
+ log('EVENT:SIGUSR2 --- SHUTING DOWN NODE-WATCHDOG');
+ process.exit(-1);
+});
+
+///////////////////////////////////////////////////////////////////////////////
+
if (process.argv.length === 4 && process.argv[2] == '--config')
configfile = process.argv[3];
@@ -26,26 +167,29 @@ function getAppName(name) {
function log(msg) {
var message = appName + ': ' + msg;
util.log(message);
- logstream.write(message);
+ logstream.write(message + '\n');
}
+///////////////////////////////////////////////////////////////////////////////
+
/**
- * [ {
- * "name":"freeswitch",
- * "description":"A service that restart freeswitch if it dies",
- * "options":{ "cwd": "/usr/local/freeswitch",
- * "env": {
- * "DEBUG":true
- * },
- * "customFds": [-1, -1, -1]
- * },
- * "command":"/usr/local/freeswitch/bin/freeswitch",
- * "arguments":["-waste","-hp"],
- * "logDirectory":"/var/log",
- * "keepalive":true,
- * "state":"enable"
- * }
- * ]
+ * {
+ * "freeswitch":{
+ * "description":"A service that restart freeswitch if it dies",
+ * "options":{ "cwd": "/usr/local/freeswitch",
+ * "env": {
+ * "DEBUG":true
+ * },
+ * "customFds": [-1, -1, -1]
+ * },
+ * "command":"/usr/local/freeswitch/bin/freeswitch",
+ * "arguments":["-waste","-hp"],
+ * "logDirectory":"/var/log",
+ * "keepalive":true,
+ * "depends":["firewall"],
+ * "state":"enable"
+ * }
+ * }
*/
function loadConfig(/*curr, prev*/) {
@@ -67,30 +211,44 @@ log('Reading ' + configfile);
updateProcessesState(fs.readFileSync(configfile));
function updateProcessesState(data) {
- var i;
+ var name,
+ daemon,
+ newConfig;
+
try {
+ // Parse the new configuration
newConfig = JSON.parse(data);
- log('updateProcessesState(' + JSONUtil.stringify(newConfig) + ')');
} catch (exception) {
log('ERROR parsing ' + configfile + ' >>>>> ' + exception + ' <<<<< IGNORING UPDATE\n---------------------------\n' + data);
return;
}
- try {
- for (i = 0 ; i < newConfig.length ; ++i ) {
- log ('updating ' + JSONUtil.stringify(newConfig[i]));
- var daemon = getDaemon(newConfig[i].name);
- if (daemon) {
- newConfig[i].runtime = daemon.runtime;
- }
- updateProcessState(newConfig[i]);
+
+ // Move the runtime info into the NEW configuration
+ for (name in newConfig) {
+ daemon = getDaemon(name);
+ if (daemon) {
+ newConfig[name].runtime = daemon.runtime;
+ config[name].runtime = null;
}
- } catch (e) {
- log('ERROR updating process ' + e + ' <<<<< IGNORING UPDATE');
- newConfig = null;
- return;
}
+
+ // The daemons that are running and that are NOT present in the NEW
+ // configuration will be terminated
+ for (name in config) {
+ daemon = getDaemon(name);
+ if (daemon && daemon.runtime) {
+ terminateProcess(daemon);
+ }
+ }
+
+ // The NEW configuration is not the ACTIVE configuration
config = newConfig;
newConfig = null;
+ for (name in config) {
+ daemon = getDaemon(name);
+ daemon.name = name; // copy the name into the object
+ updateProcessState(daemon);
+ }
}
log('Watching ' + configfile);
@@ -101,13 +259,13 @@ server = http.createServer(function(req, res){
var obj = {'application':appName, 'version' : version};
var path = url.parse(req.url).pathname;
var query = url.parse(req.url, true).query;
- var clientIPAddress = req.headers['x-forwarded-for'] === undefined ? req.connection.remoteAddress : (req.headers['x-forwarded-for'] + '/' +req.connection.remoteAddress);
+ var clientIPAddress = req.headers['x-forwarded-for'] === undefined ?
+ req.connection.remoteAddress :
+ (req.headers['x-forwarded-for'] + '/' +req.connection.remoteAddress);
if (path.indexOf('/shutdown') === 0 && clientIPAddress == '127.0.0.1') {
- log('SHUTING DOWN NODE-WATCHDOG');
- for (var i = config.length ; i >= 0 ; --i) {
- terminateProcess(config[i]);
- }
+ log('HTTP SHUTING DOWN NODE-WATCHDOG');
+ shutdown();
res.writeHead(200, {"Content-Type": "application/json"});
obj.status = 'OK';
obj.message = 'Shutting down';
@@ -155,12 +313,7 @@ server = http.createServer(function(req, res){
function getDaemon(name_or_daemon) {
if (typeof(name_or_daemon) === 'object')
return name_or_daemon;
- for (var i = 0 ; i < config.length ; ++i) {
- if (config[i].name == name_or_daemon) {
- return config[i];
- }
- }
- return null;
+ return config[name_or_daemon];
}
function statusProcess(name_or_daemon) {
var obj = {'application':appName, 'version' : version};
@@ -199,7 +352,7 @@ function startProcess(name_or_daemon) {
var daemon = getDaemon(name_or_daemon);
if (daemon && (daemon.childInfo === undefined || daemon.childInfo === null)) {
- log('startProcess ' + daemon.name);
+ log('Spawning "' + daemon.name + '"');
obj.status = 'OK';
if (daemon.runtime === undefined)
daemon.runtime = {};
@@ -209,34 +362,51 @@ function startProcess(name_or_daemon) {
daemon.runtime.state = 'running';
daemon.childInfo.on('exit', function (code, signal) {
var epoch = new Date().getTime();
+
+ daemon.runtime.exitCode = code === undefined ? 0 : code;
+
+ // 1) Check if the daemon has forked and becomes unmonitored.
+ if (code !== undefined && code === 0 && epoch - daemon.runtime.epoch < 1000 * 60) {
+ log(daemon.name + 'Exitied after ' + (epoch - daemon.runtime.epoch) / 1000 + ' second(s), with a code of ' + code + ', assuming a successful fork()');
+ daemon.runtime.state = 'forked';
+ return;
+ }
+ // 2) Check if the process constantly crash and respawn too quickly.
if (epoch - daemon.runtime.epoch < 1000 * 60 * 5) { // less than 5 minutes
- if (daemon.runtime.delay === undefined)
+ // 2.1) it's the first time, we will try in 10 seconds to restart it.
+ if (daemon.runtime.delay === undefined) {
daemon.runtime.delay = 1000 * 10; // 10 seconds;
- else
+ log(daemon.name + ' crashed for the first time, we will restart it in ' + (daemon.runtime.delay /1000) + ' seconds');
+ } else {
+ // 2.2) Ok, it is not the first time, lets double the time we
+ // wait before restarting it.
daemon.runtime.delay *= 2; // avoid process respawning too rapidly
+ log(daemon.name + ' crashed again in less than five minutes, we will restart it in ' + (daemon.runtime.delay /1000) + ' seconds');
+ }
} else {
+ // 3) Ok, it crashed after more than 5 minutes, we will restrart it
+ // in one second.
daemon.runtime.delay = 1000 * 1; // 1 second
+ log(daemon.name + ' crashed after more than five minutes, we will restart it in ' + (daemon.runtime.delay /1000) + ' second(s)');
}
+
+ // 4) Let's reset the time stamp at which it was started
daemon.runtime.epoch = epoch;
- daemon.runtime.exitCode = code === undefined ? 0 : code;
- daemon.runtime.state = 'exitted with code ' + daemon.runtime.exitCode;
- log(daemon.name + ' ' + daemon.runtime.state + (signal === undefined ? '' : ' with signal ' + signal));
- daemon.runtime.logstream.write('EXIT: ' + daemon.runtime.exitCode + '\n');
+ daemon.runtime.state = signal === undefined ? 'crash' : 'signal';
+ daemon.runtime.logstream.write('EXIT: ' + daemon.runtime.exitCode);
daemon.runtime.logstream = null;
daemon.childInfo = null;
if (daemon.keepalive && daemon.state == 'enable') {
- // TODO: Should trottle
-
setTimeout(function() {
updateProcessState(daemon);
}, daemon.runtime.delay);
}
});
daemon.childInfo.stdout.on('data', function (data) {
- if (daemon.childInfo && daemon.childInfo.logstream) daemon.runtime.logstream.write('STDOUT: ' + data);
+ if (daemon.childInfo && daemon.childInfo.logstream) daemon.runtime.logstream.write('STDOUT: ' + data + '\n');
});
daemon.childInfo.stderr.on('data', function (data) {
- if (daemon.childInfo && daemon.childInfo.logstream) daemon.runtime.logstream.write('STDERR: ' + data);
+ if (daemon.childInfo && daemon.childInfo.logstream) daemon.runtime.logstream.write('STDERR: ' + data + '\n');
});
}
return obj;
@@ -250,4 +420,9 @@ function updateProcessState(daemon) {
terminateProcess(daemon);
}
}
-
+function shutdown() {
+ // TODO: should look at the depends field
+ for (var name in config) {
+ terminateProcess(name);
+ }
+}

0 comments on commit 284f199

Please sign in to comment.