Permalink
Cannot retrieve contributors at this time
#!/usr/bin/env ruby | |
# -*-ruby-*- | |
require 'rubygems' | |
require 'set' | |
require 'getoptlong' | |
require 'hiredis' | |
require 'redis' | |
require 'socket' | |
$INTERVAL = 2 | |
$LIMIT = 2 | |
LOGFILE = "/mnt/svoice/log/resreap.log" | |
CMDLOG_DIR = "/mnt/svoice/log/resreap" | |
PIDFILE = "/var/run/resreap.pid" | |
#LOGFILE = "resreap.log" | |
#CMDLOG_DIR = "resreap" | |
#PIDFILE = "resreap.pid" | |
$SIGWAIT=1 | |
$SIGNAL = "TERM" | |
$FILTER = nil | |
$COMMAND = nil | |
$DRYRUN = false | |
$DEBUG = false | |
$FOREIGN_IGNORES = [] | |
$OPEN_MAX = nil | |
$PS_FILTER = nil | |
$REDIS_ENDPOINTS = [] | |
NS_PROTOCOL = 0 | |
NS_RECVQ = 1 | |
NS_SENDQ = 2 | |
NS_LADDR = 3 | |
NS_FADDR = 4 | |
NS_STATE = 5 | |
NS_USER = 6 | |
NS_INODE = 7 | |
NS_PIDPROG = 8 | |
PROGRAM_NAME = File.basename($PROGRAM_NAME) | |
VERSION_STRING = "0.1" | |
def commandline(pid) | |
return `ps -p #{pid} -o command= 2>/dev/null` | |
end | |
def log(msg, trim = true) | |
begin | |
File.open(LOGFILE, "a") { |file| file.write("#{msg}\n") } | |
rescue => e | |
end | |
if trim and (msg.size > 70) | |
$stderr.write("#{msg[0..70]}...\n") | |
else | |
$stderr.write("#{msg}\n") | |
end | |
rescue | |
end | |
def debug(msg) | |
$stderr.write("#{msg}\n") if $DEBUG | |
rescue | |
end | |
def parse_hostport(s, default_port = 6379) | |
ipv6 = true if s.count(":") > 1 | |
if ipv6 | |
m = /::[fF][fF][fF][fF]:([^:]+):([0-9]+)/.match(s) | |
if m | |
host = m[1] | |
port = m[2].to_i | |
else | |
idx = s.rindex(":") | |
if idx | |
host = s[0...idx] | |
port = s[idx+1..-1] | |
end | |
end | |
else | |
host, port = s.split(":") | |
port = port.to_i | |
end | |
port = default_port if port == 0 | |
return [ host, port ] | |
end | |
def xkill(pid) | |
return if not exist?(pid) | |
if not $DRYRUN | |
if $COMMAND | |
cmdlog = `mktemp #{CMDLOG_DIR}/cmdout.#{$$}.XXXX`.chomp | |
cmdstat = system("( export pid=#{pid}; #{$COMMAND} >&\"#{cmdlog}\" )") | |
if cmdstat.nil? | |
log "warning: cannot execute the command, #{$COMMAND}", false | |
return false | |
end | |
log "process #{pid} command log saved to #{cmdlog}" | |
return if not cmdstat | |
end | |
log "killing process #{pid}" | |
log " #{commandline(pid)}" | |
system("( kill -#{$SIGNAL} #{pid}; sleep #{$SIGWAIT}; \ | |
if kill -0 #{pid} 2>/dev/null; then \ | |
echo \"process #{pid} didn't exit, force killing\" >> \"#{LOGFILE}\"; \ | |
kill -9 #{pid}; \ | |
else \ | |
echo \"process #{pid} killed\" >> \"#{LOGFILE}\"; | |
fi )& ") | |
else | |
log "process #{pid} is not killed, dryrun: #{commandline pid}" | |
end | |
return true | |
end | |
def ps_list() | |
pids = [] | |
return pids if not $PS_FILTER | |
IO.popen("ps #{$PS_FILTER}") { |pfd| | |
pfd.readlines().each { |line| | |
tokens = line.split() | |
next if tokens[0] == "PID" | |
pids << tokens[0].to_i | |
} | |
} | |
return pids | |
end | |
def openmax(maxspec) | |
set = Set.new() | |
return set if not maxspec | |
begin | |
ps_list().each { |pid| | |
#debug "openmax: dealing with #{pid}" | |
limit = fd_limit(pid) | |
count = fd_count(pid) | |
next if limit < 0 | |
ratio = count * 100.0 / limit.to_f | |
if maxspec[-1] == ?%.ord | |
debug "openmax: process(#{pid}) #{count}/#{limit} (#{ratio}%) >= #{maxspec.to_f}%" | |
set.add(pid) if ratio >= maxspec.to_f | |
else | |
debug "openmax: process(#{pid}) #{count}/#{limit} #{maxspec.to_i}" | |
set.add(pid) if count >= maxspec.to_i | |
end | |
} | |
rescue => e | |
debug "openmax: exception #{e.message}" | |
ensure | |
return set | |
end | |
end | |
def fd_count(pid) | |
Dir.glob("/proc/#{pid}/fd/*").size | |
end | |
def fd_limit(pid) | |
flim = 0 | |
re = Regexp.new(/^Max open files *([^ ]+)/) | |
File.open("/proc/#{pid}/limits", "r") { |fd| | |
fd.readlines().each { |line| | |
m = re.match(line) | |
if m | |
flim = m[1] | |
break | |
end | |
} | |
} | |
if flim == "unlimited" | |
flim = -1 | |
else | |
flim = flim.to_i | |
end | |
return flim | |
end | |
def exist?(pid) | |
Process.kill(0, pid.to_i) | |
true | |
rescue | |
false | |
end | |
def netstat(state = nil) | |
input = {} | |
#debug "netstat:" | |
IO.popen('/bin/netstat -t -e -n -p 2>/dev/null') { |fd| | |
#$stderr.write("fd: #{fd}\n") | |
fd.readlines.each { |line| | |
#$stderr.write("line: #{line}\n") | |
tokens = line.split | |
next if tokens[NS_PROTOCOL] != "tcp" | |
next if state and tokens[NS_STATE] != state | |
debug "port #{tokens[NS_LADDR].split(":")[1]} is in #{tokens[NS_STATE]}. added" | |
ignore = false | |
$FOREIGN_IGNORES.each { |host, port| | |
host2, port2 = parse_hostport(tokens[NS_FADDR]) | |
debug "comparing #{host}:#{port} with #{host2}:#{port2}" | |
if host == host2 and port == port2 | |
debug "ignore foreign endpoint (#{host}:#{port})" | |
ignore = true | |
break | |
end | |
} | |
next if ignore | |
if tokens[NS_PIDPROG] == "-" | |
addr, port = parse_hostport(tokens[NS_LADDR]) | |
faddr, fport = parse_hostport(tokens[NS_FADDR]) | |
#debug "connection #{addr}:#{port} with #{faddr}:#{fport}" | |
if port | |
output = `fuser -n tcp #{port} 2>/dev/null` | |
if output.split[0] | |
pid = output.split[0].to_i | |
#entry = [ addr, port, pid, commandline(pid) ] | |
if not input[pid] | |
s = Set.new | |
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]" | |
s.add([addr, port, faddr, fport]) | |
input[pid] = [ s, commandline(pid) ] | |
else | |
#puts "input[pid][0] = #{input[pid][1]}" | |
s = input[pid][0] | |
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]" | |
s.add([addr, port, faddr, fport]) | |
end | |
else | |
debug "fuser error?: #{output}" | |
end | |
else | |
debug "no port" | |
end | |
else | |
pid, = tokens[NS_PIDPROG].split("/") | |
addr, port = parse_hostport(tokens[NS_LADDR]) | |
faddr, fport = parse_hostport(tokens[NS_FADDR]) | |
if not input[pid] | |
s = Set.new | |
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]" | |
s.add([addr, port, faddr, fport]) | |
input[pid] = [ s, commandline(pid) ] | |
else | |
#puts "input[pid][0] = #{input[pid][1]}" | |
s = input[pid][0] | |
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]" | |
s.add([addr, port, faddr, fport]) | |
end | |
end | |
} | |
} | |
return input | |
rescue => e | |
log "proc netstat failed: #{e.message}" | |
$stderr.write("proc netstat failed: #{e.message}\n") | |
return "" | |
end | |
def process_kill | |
local_hostname = Socket.gethostname | |
begin | |
host, port = $REDIS_ENDPOINTS.shift | |
$REDIS_ENDPOINTS.push([host, port]) | |
redis = Redis.new(:driver => :hiredis, | |
:host => host, :port => port.to_i) | |
redis.subscribe(:kill) { |on| | |
on.subscribe { |channel, subscriptions| | |
debug "Subscribed to ##{channel} (#{subscriptions} subscriptions)" | |
} | |
on.message { |channel, message| | |
debug "##{channel}: #{message}" | |
redis.unsubscribe if message == "exit" | |
# int QWER(output-MAP, input1, input2, input-MAP) | |
tokens = message.split | |
if tokens.length > 2 | |
# tokens[0] = host | |
# tokens[1] = port | |
name = tokens[0] | |
host = tokens[1] | |
port = tokens[2] | |
if local_hostname == host | |
output = `fuser -n tcp #{port} 2>/dev/null` | |
if output.empty? | |
log "warning: there is no process associated with port #{port}" | |
elsif output.split[0] | |
pid = output.split[0].to_i | |
log "KILL receives for #{name}/#{host} TCP port #{port}" | |
xkill pid | |
end | |
end | |
end | |
} | |
on.unsubscribe { |channel, subscriptions| | |
debug "Unsubscribed from ##{channel} (#{subscriptions} subscriptions)" | |
} | |
} | |
rescue Redis::BaseConnectionError => error | |
log "warning: #{error}" | |
sleep 3 | |
retry | |
rescue => error | |
log "warning: #{error}" | |
sleep 3 | |
retry | |
end | |
end | |
def help_and_exit | |
msg = <<END | |
Kill processes that have enough CLOSE_WAIT socket(s) | |
Usage: #{PROGRAM_NAME} [OPTION...] | |
CLOSE_WAIT related: | |
-f PAT Kill only processes whose command matches PAT | |
-F HOST:PORT Ignore if foreign endpoint matches to HOST:PORT | |
HOST should be in IPv4 numerical notation. | |
-l N If a process has more than or equal to N CLOSE_WAIT | |
socket(s), it will be killed with a signal | |
(default: #{$LIMIT}) | |
Open file related: | |
-o OPENMAX Kill a process if it has more than | |
OPENMAX open files among processes in -O filter option. | |
-p FILTER Select processes from "ps FILTER" shell command. | |
For example, to select all process with the command | |
name XXXX, you could pass -O "-C XXXX". | |
General: | |
-i N Set sleep interval between checks in seconds | |
(default: #{$INTERVAL}) | |
-c CMD Before sending a signal, execute CMD in the shell, | |
If this CMD returns non-zero returns, the process | |
will not receive any signal. | |
-s SIG Set the signal name (e.g. TERM) that will be send | |
to a process (default: #{$SIGNAL}) | |
-w SEC Set the waiting time in seconds between the signal and | |
SIGKILL (default: #{$SIGWAIT}) | |
-d dry run, no kill | |
-D debug mode | |
-h show this poor help messages and exit | |
-v show version information and exit | |
Note that if a process receives the signal, and the process is alive | |
for #{$SIGWAIT} second(s), the process will receive SIGKILL. | |
If you are going to use "-f" option, I recommend to try "-d -D" option | |
first. If you get the pid of the culprit process, try to get the | |
command name by "ps -p PID -o command=" where PID is the pid of that | |
process. | |
You could send two signal(s) before sending SIGKILL using '-S' option. | |
This can be useful since some JVM print stacktrace on SIGQUIT. | |
END | |
puts msg | |
exit 0 | |
end | |
def version_and_exit | |
puts "#{PROGRAM_NAME} version #{VERSION_STRING}" | |
exit 0 | |
end | |
options = GetoptLong.new( | |
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--pattern', '-f', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--interval', '-i', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--command', '-c', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--signal', '-s', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--sigwait', '-w', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--dryrun', '-d', GetoptLong::NO_ARGUMENT ], | |
[ '--debug', '-D', GetoptLong::NO_ARGUMENT ], | |
[ '--foreign', '-F', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--openmax', '-o', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--psfilter', '-p', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--redis', '-r', GetoptLong::REQUIRED_ARGUMENT ], | |
[ '--help', '-h', GetoptLong::NO_ARGUMENT ], | |
[ '--version', '-n', GetoptLong::NO_ARGUMENT ] | |
) | |
options.each { |opt, arg| | |
case opt | |
when '--help' | |
help_and_exit | |
when '--version' | |
version_and_exit | |
when '--limit' | |
$LIMIT = arg.to_i | |
when '--pattern' | |
$FILTER = arg | |
when '--interval' | |
$INTERVAL = arg.to_i | |
when '--command' | |
$COMMAND = arg | |
when '--signal' | |
$SIGNAL = arg.upcase | |
when '--sigwait' | |
$SIGWAIT = arg.to_i | |
when '--foreign' | |
begin | |
host, port = parse_hostport(arg) | |
$FOREIGN_IGNORES << [ host, port ] | |
end | |
when '--redis' | |
begin | |
host, port = parse_hostport(arg, 6379) | |
$REDIS_ENDPOINTS.push([host, port]) | |
debug "REDIS: adding endpoint #{host}:#{port}" | |
end | |
when '--dryrun' | |
$DRYRUN = true | |
when '--debug' | |
$DEBUG = true | |
when '--psfilter' | |
$PS_FILTER = arg | |
when '--openmax' | |
$OPEN_MAX = arg | |
end | |
} | |
debug "limit: #{$LIMIT}" | |
debug "pattern: #{$FILTER}" | |
debug "interval: #{$INTERVAL}" | |
debug "command: #{$COMMAND}" | |
debug "signal: #{$SIGNAL}" | |
debug "signal wait: #{$SIGWAIT}" | |
debug "dryrun: #{$DRYRUN}" | |
debug "debug: #{$DEBUG}" | |
debug "psfilter: #{$PS_FILTER}" | |
debug "openmax: #{$OPEN_MAX}" | |
at_exit { | |
#[ "INT", "HUP", "TERM", "QUIT" ].each { |sigspec| | |
#Signal.trap(sigspec) do | |
#puts "at_exit handler: " | |
system("rm -f \"#{PIDFILE}\"") | |
#shutdown() | |
#end | |
} | |
begin | |
system("mkdir -p \"#{CMDLOG_DIR}\" >&/dev/null") | |
rescue => e | |
log "warning: #{e.message}" | |
end | |
begin | |
File.open(PIDFILE, "w") { |file| file.write("#{$$}\n") } | |
rescue => e | |
log "warning: can't write pidfile: #{e.message}" | |
end | |
$INPUT1 = netstat "CLOSE_WAIT" | |
$RINPUT1 = openmax($OPEN_MAX) | |
#debug "fdset1: #{$RINPUT1.to_a.join(",")}" | |
$FILTER = Regexp.new $FILTER if $FILTER | |
if $REDIS_ENDPOINTS.length > 0 | |
debug "starting kill subscription thread" | |
$THREAD_KILL = Thread.new { | |
process_kill | |
} | |
else | |
debug "warning: no kill subscription" | |
end | |
while true | |
begin | |
sleep $INTERVAL | |
$INPUT2 = netstat "CLOSE_WAIT" | |
$RINPUT2 = openmax($OPEN_MAX) | |
#debug "fdset2: #{$RINPUT2.to_a.join(",")}" | |
datecomment = true | |
$INPUT2.each_pair { |pid, data| | |
next if not $INPUT1[pid] | |
if $FILTER and not (data[1] =~ $FILTER) | |
debug "skipped: #{data[1]}" | |
next | |
end | |
newset = data[0]; | |
oldset = $INPUT1[pid][0]; | |
intsect = newset & oldset | |
#debug "process(#{pid}) has #{intsect.size} connection" | |
if (intsect.size >= $LIMIT) | |
log "process(#{pid}) has #{intsect.size} connections" | |
debug "\tPort(s): #{intsect.to_a.join(", ")}" | |
if datecomment | |
log "# " + `date -u -Iseconds`.chomp | |
datecomment = false | |
end | |
xkill pid | |
end | |
} | |
begin | |
intsect = $RINPUT1 & $RINPUT2 | |
intsect.each { |pid| | |
debug "process(#{pid}) open files meet #{$OPEN_MAX}" | |
xkill pid | |
} | |
end | |
$INPUT1 = $INPUT2 | |
$RINPUT1 = $RINPUT2 | |
rescue => e | |
log "main loop exception: #{e.message}" | |
end | |
end |