Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 561 lines (470 sloc) 13.5 KB
#!/usr/bin/env ruby
# -*-ruby-*-
require 'rubygems'
require 'set'
require 'getoptlong'
require 'hiredis'
require 'redis'
require 'socket'
$INTERVAL = 2
$LIMIT = 2
LOGFILE = "/mnt/svoice/log/resreap.log"
CMDLOG_DIR = "/mnt/svoice/log/resreap"
PIDFILE = "/var/run/resreap.pid"
#LOGFILE = "resreap.log"
#CMDLOG_DIR = "resreap"
#PIDFILE = "resreap.pid"
$SIGWAIT=1
$SIGNAL = "TERM"
$FILTER = nil
$COMMAND = nil
$DRYRUN = false
$DEBUG = false
$FOREIGN_IGNORES = []
$OPEN_MAX = nil
$PS_FILTER = nil
$REDIS_ENDPOINTS = []
NS_PROTOCOL = 0
NS_RECVQ = 1
NS_SENDQ = 2
NS_LADDR = 3
NS_FADDR = 4
NS_STATE = 5
NS_USER = 6
NS_INODE = 7
NS_PIDPROG = 8
PROGRAM_NAME = File.basename($PROGRAM_NAME)
VERSION_STRING = "0.1"
def commandline(pid)
return `ps -p #{pid} -o command= 2>/dev/null`
end
def log(msg, trim = true)
begin
File.open(LOGFILE, "a") { |file| file.write("#{msg}\n") }
rescue => e
end
if trim and (msg.size > 70)
$stderr.write("#{msg[0..70]}...\n")
else
$stderr.write("#{msg}\n")
end
rescue
end
def debug(msg)
$stderr.write("#{msg}\n") if $DEBUG
rescue
end
def parse_hostport(s, default_port = 6379)
ipv6 = true if s.count(":") > 1
if ipv6
m = /::[fF][fF][fF][fF]:([^:]+):([0-9]+)/.match(s)
if m
host = m[1]
port = m[2].to_i
else
idx = s.rindex(":")
if idx
host = s[0...idx]
port = s[idx+1..-1]
end
end
else
host, port = s.split(":")
port = port.to_i
end
port = default_port if port == 0
return [ host, port ]
end
def xkill(pid)
return if not exist?(pid)
if not $DRYRUN
if $COMMAND
cmdlog = `mktemp #{CMDLOG_DIR}/cmdout.#{$$}.XXXX`.chomp
cmdstat = system("( export pid=#{pid}; #{$COMMAND} >&\"#{cmdlog}\" )")
if cmdstat.nil?
log "warning: cannot execute the command, #{$COMMAND}", false
return false
end
log "process #{pid} command log saved to #{cmdlog}"
return if not cmdstat
end
log "killing process #{pid}"
log " #{commandline(pid)}"
system("( kill -#{$SIGNAL} #{pid}; sleep #{$SIGWAIT}; \
if kill -0 #{pid} 2>/dev/null; then \
echo \"process #{pid} didn't exit, force killing\" >> \"#{LOGFILE}\"; \
kill -9 #{pid}; \
else \
echo \"process #{pid} killed\" >> \"#{LOGFILE}\";
fi )& ")
else
log "process #{pid} is not killed, dryrun: #{commandline pid}"
end
return true
end
def ps_list()
pids = []
return pids if not $PS_FILTER
IO.popen("ps #{$PS_FILTER}") { |pfd|
pfd.readlines().each { |line|
tokens = line.split()
next if tokens[0] == "PID"
pids << tokens[0].to_i
}
}
return pids
end
def openmax(maxspec)
set = Set.new()
return set if not maxspec
begin
ps_list().each { |pid|
#debug "openmax: dealing with #{pid}"
limit = fd_limit(pid)
count = fd_count(pid)
next if limit < 0
ratio = count * 100.0 / limit.to_f
if maxspec[-1] == ?%.ord
debug "openmax: process(#{pid}) #{count}/#{limit} (#{ratio}%) >= #{maxspec.to_f}%"
set.add(pid) if ratio >= maxspec.to_f
else
debug "openmax: process(#{pid}) #{count}/#{limit} #{maxspec.to_i}"
set.add(pid) if count >= maxspec.to_i
end
}
rescue => e
debug "openmax: exception #{e.message}"
ensure
return set
end
end
def fd_count(pid)
Dir.glob("/proc/#{pid}/fd/*").size
end
def fd_limit(pid)
flim = 0
re = Regexp.new(/^Max open files *([^ ]+)/)
File.open("/proc/#{pid}/limits", "r") { |fd|
fd.readlines().each { |line|
m = re.match(line)
if m
flim = m[1]
break
end
}
}
if flim == "unlimited"
flim = -1
else
flim = flim.to_i
end
return flim
end
def exist?(pid)
Process.kill(0, pid.to_i)
true
rescue
false
end
def netstat(state = nil)
input = {}
#debug "netstat:"
IO.popen('/bin/netstat -t -e -n -p 2>/dev/null') { |fd|
#$stderr.write("fd: #{fd}\n")
fd.readlines.each { |line|
#$stderr.write("line: #{line}\n")
tokens = line.split
next if tokens[NS_PROTOCOL] != "tcp"
next if state and tokens[NS_STATE] != state
debug "port #{tokens[NS_LADDR].split(":")[1]} is in #{tokens[NS_STATE]}. added"
ignore = false
$FOREIGN_IGNORES.each { |host, port|
host2, port2 = parse_hostport(tokens[NS_FADDR])
debug "comparing #{host}:#{port} with #{host2}:#{port2}"
if host == host2 and port == port2
debug "ignore foreign endpoint (#{host}:#{port})"
ignore = true
break
end
}
next if ignore
if tokens[NS_PIDPROG] == "-"
addr, port = parse_hostport(tokens[NS_LADDR])
faddr, fport = parse_hostport(tokens[NS_FADDR])
#debug "connection #{addr}:#{port} with #{faddr}:#{fport}"
if port
output = `fuser -n tcp #{port} 2>/dev/null`
if output.split[0]
pid = output.split[0].to_i
#entry = [ addr, port, pid, commandline(pid) ]
if not input[pid]
s = Set.new
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]"
s.add([addr, port, faddr, fport])
input[pid] = [ s, commandline(pid) ]
else
#puts "input[pid][0] = #{input[pid][1]}"
s = input[pid][0]
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]"
s.add([addr, port, faddr, fport])
end
else
debug "fuser error?: #{output}"
end
else
debug "no port"
end
else
pid, = tokens[NS_PIDPROG].split("/")
addr, port = parse_hostport(tokens[NS_LADDR])
faddr, fport = parse_hostport(tokens[NS_FADDR])
if not input[pid]
s = Set.new
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]"
s.add([addr, port, faddr, fport])
input[pid] = [ s, commandline(pid) ]
else
#puts "input[pid][0] = #{input[pid][1]}"
s = input[pid][0]
#debug "adding [#{addr}:#{port}-#{faddr}:#{fport}]"
s.add([addr, port, faddr, fport])
end
end
}
}
return input
rescue => e
log "proc netstat failed: #{e.message}"
$stderr.write("proc netstat failed: #{e.message}\n")
return ""
end
def process_kill
local_hostname = Socket.gethostname
begin
host, port = $REDIS_ENDPOINTS.shift
$REDIS_ENDPOINTS.push([host, port])
redis = Redis.new(:driver => :hiredis,
:host => host, :port => port.to_i)
redis.subscribe(:kill) { |on|
on.subscribe { |channel, subscriptions|
debug "Subscribed to ##{channel} (#{subscriptions} subscriptions)"
}
on.message { |channel, message|
debug "##{channel}: #{message}"
redis.unsubscribe if message == "exit"
# int QWER(output-MAP, input1, input2, input-MAP)
tokens = message.split
if tokens.length > 2
# tokens[0] = host
# tokens[1] = port
name = tokens[0]
host = tokens[1]
port = tokens[2]
if local_hostname == host
output = `fuser -n tcp #{port} 2>/dev/null`
if output.empty?
log "warning: there is no process associated with port #{port}"
elsif output.split[0]
pid = output.split[0].to_i
log "KILL receives for #{name}/#{host} TCP port #{port}"
xkill pid
end
end
end
}
on.unsubscribe { |channel, subscriptions|
debug "Unsubscribed from ##{channel} (#{subscriptions} subscriptions)"
}
}
rescue Redis::BaseConnectionError => error
log "warning: #{error}"
sleep 3
retry
rescue => error
log "warning: #{error}"
sleep 3
retry
end
end
def help_and_exit
msg = <<END
Kill processes that have enough CLOSE_WAIT socket(s)
Usage: #{PROGRAM_NAME} [OPTION...]
CLOSE_WAIT related:
-f PAT Kill only processes whose command matches PAT
-F HOST:PORT Ignore if foreign endpoint matches to HOST:PORT
HOST should be in IPv4 numerical notation.
-l N If a process has more than or equal to N CLOSE_WAIT
socket(s), it will be killed with a signal
(default: #{$LIMIT})
Open file related:
-o OPENMAX Kill a process if it has more than
OPENMAX open files among processes in -O filter option.
-p FILTER Select processes from "ps FILTER" shell command.
For example, to select all process with the command
name XXXX, you could pass -O "-C XXXX".
General:
-i N Set sleep interval between checks in seconds
(default: #{$INTERVAL})
-c CMD Before sending a signal, execute CMD in the shell,
If this CMD returns non-zero returns, the process
will not receive any signal.
-s SIG Set the signal name (e.g. TERM) that will be send
to a process (default: #{$SIGNAL})
-w SEC Set the waiting time in seconds between the signal and
SIGKILL (default: #{$SIGWAIT})
-d dry run, no kill
-D debug mode
-h show this poor help messages and exit
-v show version information and exit
Note that if a process receives the signal, and the process is alive
for #{$SIGWAIT} second(s), the process will receive SIGKILL.
If you are going to use "-f" option, I recommend to try "-d -D" option
first. If you get the pid of the culprit process, try to get the
command name by "ps -p PID -o command=" where PID is the pid of that
process.
You could send two signal(s) before sending SIGKILL using '-S' option.
This can be useful since some JVM print stacktrace on SIGQUIT.
END
puts msg
exit 0
end
def version_and_exit
puts "#{PROGRAM_NAME} version #{VERSION_STRING}"
exit 0
end
options = GetoptLong.new(
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ],
[ '--pattern', '-f', GetoptLong::REQUIRED_ARGUMENT ],
[ '--interval', '-i', GetoptLong::REQUIRED_ARGUMENT ],
[ '--command', '-c', GetoptLong::REQUIRED_ARGUMENT ],
[ '--signal', '-s', GetoptLong::REQUIRED_ARGUMENT ],
[ '--sigwait', '-w', GetoptLong::REQUIRED_ARGUMENT ],
[ '--dryrun', '-d', GetoptLong::NO_ARGUMENT ],
[ '--debug', '-D', GetoptLong::NO_ARGUMENT ],
[ '--foreign', '-F', GetoptLong::REQUIRED_ARGUMENT ],
[ '--openmax', '-o', GetoptLong::REQUIRED_ARGUMENT ],
[ '--psfilter', '-p', GetoptLong::REQUIRED_ARGUMENT ],
[ '--redis', '-r', GetoptLong::REQUIRED_ARGUMENT ],
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
[ '--version', '-n', GetoptLong::NO_ARGUMENT ]
)
options.each { |opt, arg|
case opt
when '--help'
help_and_exit
when '--version'
version_and_exit
when '--limit'
$LIMIT = arg.to_i
when '--pattern'
$FILTER = arg
when '--interval'
$INTERVAL = arg.to_i
when '--command'
$COMMAND = arg
when '--signal'
$SIGNAL = arg.upcase
when '--sigwait'
$SIGWAIT = arg.to_i
when '--foreign'
begin
host, port = parse_hostport(arg)
$FOREIGN_IGNORES << [ host, port ]
end
when '--redis'
begin
host, port = parse_hostport(arg, 6379)
$REDIS_ENDPOINTS.push([host, port])
debug "REDIS: adding endpoint #{host}:#{port}"
end
when '--dryrun'
$DRYRUN = true
when '--debug'
$DEBUG = true
when '--psfilter'
$PS_FILTER = arg
when '--openmax'
$OPEN_MAX = arg
end
}
debug "limit: #{$LIMIT}"
debug "pattern: #{$FILTER}"
debug "interval: #{$INTERVAL}"
debug "command: #{$COMMAND}"
debug "signal: #{$SIGNAL}"
debug "signal wait: #{$SIGWAIT}"
debug "dryrun: #{$DRYRUN}"
debug "debug: #{$DEBUG}"
debug "psfilter: #{$PS_FILTER}"
debug "openmax: #{$OPEN_MAX}"
at_exit {
#[ "INT", "HUP", "TERM", "QUIT" ].each { |sigspec|
#Signal.trap(sigspec) do
#puts "at_exit handler: "
system("rm -f \"#{PIDFILE}\"")
#shutdown()
#end
}
begin
system("mkdir -p \"#{CMDLOG_DIR}\" >&/dev/null")
rescue => e
log "warning: #{e.message}"
end
begin
File.open(PIDFILE, "w") { |file| file.write("#{$$}\n") }
rescue => e
log "warning: can't write pidfile: #{e.message}"
end
$INPUT1 = netstat "CLOSE_WAIT"
$RINPUT1 = openmax($OPEN_MAX)
#debug "fdset1: #{$RINPUT1.to_a.join(",")}"
$FILTER = Regexp.new $FILTER if $FILTER
if $REDIS_ENDPOINTS.length > 0
debug "starting kill subscription thread"
$THREAD_KILL = Thread.new {
process_kill
}
else
debug "warning: no kill subscription"
end
while true
begin
sleep $INTERVAL
$INPUT2 = netstat "CLOSE_WAIT"
$RINPUT2 = openmax($OPEN_MAX)
#debug "fdset2: #{$RINPUT2.to_a.join(",")}"
datecomment = true
$INPUT2.each_pair { |pid, data|
next if not $INPUT1[pid]
if $FILTER and not (data[1] =~ $FILTER)
debug "skipped: #{data[1]}"
next
end
newset = data[0];
oldset = $INPUT1[pid][0];
intsect = newset & oldset
#debug "process(#{pid}) has #{intsect.size} connection"
if (intsect.size >= $LIMIT)
log "process(#{pid}) has #{intsect.size} connections"
debug "\tPort(s): #{intsect.to_a.join(", ")}"
if datecomment
log "# " + `date -u -Iseconds`.chomp
datecomment = false
end
xkill pid
end
}
begin
intsect = $RINPUT1 & $RINPUT2
intsect.each { |pid|
debug "process(#{pid}) open files meet #{$OPEN_MAX}"
xkill pid
}
end
$INPUT1 = $INPUT2
$RINPUT1 = $RINPUT2
rescue => e
log "main loop exception: #{e.message}"
end
end