forked from resque/resque
-
Notifications
You must be signed in to change notification settings - Fork 0
/
worker.rb
543 lines (471 loc) · 15.4 KB
/
worker.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
module Resque
# A Resque Worker processes jobs. On platforms that support fork(2),
# the worker will fork off a child to process each job. This ensures
# a clean slate when beginning the next job and cuts down on gradual
# memory growth as well as low level failures.
#
# It also ensures workers are always listening to signals from you,
# their master, and can react accordingly.
class Worker
include Resque::Helpers
extend Resque::Helpers
# Whether the worker should log basic info to STDOUT
attr_accessor :verbose
# Whether the worker should log lots of info to STDOUT
attr_accessor :very_verbose
# Boolean indicating whether this worker can or can not fork.
# Automatically set if a fork(2) fails.
attr_accessor :cant_fork
attr_writer :to_s
# Returns an array of all worker objects.
def self.all
Array(redis.smembers(:workers)).map { |id| find(id) }.compact
end
# Returns an array of all worker objects currently processing
# jobs.
def self.working
names = all
return [] unless names.any?
names.map! { |name| "worker:#{name}" }
reportedly_working = begin
redis.mapped_mget(*names).reject do |key, value|
value.nil? || value.empty?
end
rescue Redis::Distributed::CannotDistribute
result = {}
names.each do |name|
value = redis.get name
result[name] = value unless value.nil? || value.empty?
end
result
end
reportedly_working.keys.map do |key|
find key.sub("worker:", '')
end.compact
end
# Returns a single worker object. Accepts a string id.
def self.find(worker_id)
if exists? worker_id
queues = worker_id.split(':')[-1].split(',')
worker = new(*queues)
worker.to_s = worker_id
worker
else
nil
end
end
# Alias of `find`
def self.attach(worker_id)
find(worker_id)
end
# Given a string worker id, return a boolean indicating whether the
# worker exists
def self.exists?(worker_id)
redis.sismember(:workers, worker_id)
end
# Workers should be initialized with an array of string queue
# names. The order is important: a Worker will check the first
# queue given for a job. If none is found, it will check the
# second queue name given. If a job is found, it will be
# processed. Upon completion, the Worker will again check the
# first queue given, and so forth. In this way the queue list
# passed to a Worker on startup defines the priorities of queues.
#
# If passed a single "*", this Worker will operate on all queues
# in alphabetical order. Queues can be dynamically added or
# removed without needing to restart workers using this method.
def initialize(*queues)
@queues = queues.map { |queue| queue.to_s.strip }
validate_queues
end
# A worker must be given a queue, otherwise it won't know what to
# do with itself.
#
# You probably never need to call this.
def validate_queues
if @queues.nil? || @queues.empty?
raise NoQueueError.new("Please give each worker at least one queue.")
end
end
# This is the main workhorse method. Called on a Worker instance,
# it begins the worker life cycle.
#
# The following events occur during a worker's life cycle:
#
# 1. Startup: Signals are registered, dead workers are pruned,
# and this worker is registered.
# 2. Work loop: Jobs are pulled from a queue and processed.
# 3. Teardown: This worker is unregistered.
#
# Can be passed a float representing the polling frequency.
# The default is 5 seconds, but for a semi-active site you may
# want to use a smaller value.
#
# Also accepts a block which will be passed the job as soon as it
# has completed processing. Useful for testing.
def work(interval = 5.0, &block)
interval = Float(interval)
$0 = "resque: Starting"
startup
loop do
break if shutdown?
if not paused? and job = reserve
log "got: #{job.inspect}"
run_hook :before_fork, job
working_on job
if @child = fork
srand # Reseeding
procline "Forked #{@child} at #{Time.now.to_i}"
Process.wait
else
procline "Processing #{job.queue} since #{Time.now.to_i}"
perform(job, &block)
exit! unless @cant_fork
end
done_working
@child = nil
else
break if interval.zero?
log! "Sleeping for #{interval} seconds"
procline paused? ? "Paused" : "Waiting for #{@queues.join(',')}"
sleep interval
end
end
ensure
unregister_worker
end
# DEPRECATED. Processes a single job. If none is given, it will
# try to produce one. Usually run in the child.
def process(job = nil, &block)
return unless job ||= reserve
working_on job
perform(job, &block)
ensure
done_working
end
# Processes a given job in the child.
def perform(job)
begin
run_hook :after_fork, job
job.perform
rescue Object => e
log "#{job.inspect} failed: #{e.inspect}"
begin
job.fail(e)
rescue Object => e
log "Received exception when reporting failure: #{e.inspect}"
end
failed!
else
log "done: #{job.inspect}"
ensure
yield job if block_given?
end
end
# Attempts to grab a job off one of the provided queues. Returns
# nil if no job can be found.
def reserve
queues.each do |queue|
log! "Checking #{queue}"
if job = Resque.reserve(queue)
log! "Found job on #{queue}"
return job
end
end
nil
rescue Exception => e
log "Error reserving job: #{e.inspect}"
log e.backtrace.join("\n")
raise e
end
# Returns a list of queues to use when searching for a job.
# A splat ("*") means you want every queue (in alpha order) - this
# can be useful for dynamically adding new queues.
def queues
@queues[0] == "*" ? Resque.queues.sort : @queues
end
# Not every platform supports fork. Here we do our magic to
# determine if yours does.
def fork
@cant_fork = true if $TESTING
return if @cant_fork
begin
# IronRuby doesn't support `Kernel.fork` yet
if Kernel.respond_to?(:fork)
Kernel.fork
else
raise NotImplementedError
end
rescue NotImplementedError
@cant_fork = true
nil
end
end
# Runs all the methods needed when a worker begins its lifecycle.
def startup
enable_gc_optimizations
register_signal_handlers
prune_dead_workers
run_hook :before_first_fork
register_worker
# Fix buffering so we can `rake resque:work > resque.log` and
# get output from the child in there.
$stdout.sync = true
end
# Enables GC Optimizations if you're running REE.
# http://www.rubyenterpriseedition.com/faq.html#adapt_apps_for_cow
def enable_gc_optimizations
if GC.respond_to?(:copy_on_write_friendly=)
GC.copy_on_write_friendly = true
end
end
# Registers the various signal handlers a worker responds to.
#
# TERM: Shutdown immediately, stop processing jobs.
# INT: Shutdown immediately, stop processing jobs.
# QUIT: Shutdown after the current job has finished processing.
# USR1: Kill the forked child immediately, continue processing jobs.
# USR2: Don't process any new jobs
# CONT: Start processing jobs again after a USR2
def register_signal_handlers
trap('TERM') { shutdown! }
trap('INT') { shutdown! }
begin
trap('QUIT') { shutdown }
trap('USR1') { kill_child }
trap('USR2') { pause_processing }
trap('CONT') { unpause_processing }
rescue ArgumentError
warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
end
log! "Registered signals"
end
# Schedule this worker for shutdown. Will finish processing the
# current job.
def shutdown
log 'Exiting...'
@shutdown = true
end
# Kill the child and shutdown immediately.
def shutdown!
shutdown
kill_child
end
# Should this worker shutdown as soon as current job is finished?
def shutdown?
@shutdown
end
# Kills the forked child immediately, without remorse. The job it
# is processing will not be completed.
def kill_child
if @child
log! "Killing child at #{@child}"
if system("ps -o pid,state -p #{@child}")
Process.kill("KILL", @child) rescue nil
else
log! "Child #{@child} not found, restarting."
shutdown
end
end
end
# are we paused?
def paused?
@paused
end
# Stop processing jobs after the current one has completed (if we're
# currently running one).
def pause_processing
log "USR2 received; pausing job processing"
@paused = true
end
# Start processing jobs again after a pause
def unpause_processing
log "CONT received; resuming job processing"
@paused = false
end
# Looks for any workers which should be running on this server
# and, if they're not, removes them from Redis.
#
# This is a form of garbage collection. If a server is killed by a
# hard shutdown, power failure, or something else beyond our
# control, the Resque workers will not die gracefully and therefore
# will leave stale state information in Redis.
#
# By checking the current Redis state against the actual
# environment, we can determine if Redis is old and clean it up a bit.
def prune_dead_workers
all_workers = Worker.all
known_workers = worker_pids unless all_workers.empty?
all_workers.each do |worker|
host, pid, queues = worker.id.split(':')
next unless host == hostname
next if known_workers.include?(pid)
log! "Pruning dead worker: #{worker}"
worker.unregister_worker
end
end
# Registers ourself as a worker. Useful when entering the worker
# lifecycle on startup.
def register_worker
redis.sadd(:workers, self)
started!
end
# Runs a named hook, passing along any arguments.
def run_hook(name, *args)
return unless hook = Resque.send(name)
msg = "Running #{name} hook"
msg << " with #{args.inspect}" if args.any?
log msg
args.any? ? hook.call(*args) : hook.call
end
# Unregisters ourself as a worker. Useful when shutting down.
def unregister_worker
# If we're still processing a job, make sure it gets logged as a
# failure.
if (hash = processing) && !hash.empty?
job = Job.new(hash['queue'], hash['payload'])
# Ensure the proper worker is attached to this job, even if
# it's not the precise instance that died.
job.worker = self
job.fail(DirtyExit.new)
end
redis.srem(:workers, self)
redis.del("worker:#{self}")
redis.del("worker:#{self}:started")
Stat.clear("processed:#{self}")
Stat.clear("failed:#{self}")
end
# Given a job, tells Redis we're working on it. Useful for seeing
# what workers are doing and when.
def working_on(job)
job.worker = self
data = encode \
:queue => job.queue,
:run_at => Time.now.strftime("%Y/%m/%d %H:%M:%S %Z"),
:payload => job.payload
redis.set("worker:#{self}", data)
end
# Called when we are done working - clears our `working_on` state
# and tells Redis we processed a job.
def done_working
processed!
redis.del("worker:#{self}")
end
# How many jobs has this worker processed? Returns an int.
def processed
Stat["processed:#{self}"]
end
# Tell Redis we've processed a job.
def processed!
Stat << "processed"
Stat << "processed:#{self}"
end
# How many failed jobs has this worker seen? Returns an int.
def failed
Stat["failed:#{self}"]
end
# Tells Redis we've failed a job.
def failed!
Stat << "failed"
Stat << "failed:#{self}"
end
# What time did this worker start? Returns an instance of `Time`
def started
redis.get "worker:#{self}:started"
end
# Tell Redis we've started
def started!
redis.set("worker:#{self}:started", Time.now.to_s)
end
# Returns a hash explaining the Job we're currently processing, if any.
def job
decode(redis.get("worker:#{self}")) || {}
end
alias_method :processing, :job
# Boolean - true if working, false if not
def working?
state == :working
end
# Boolean - true if idle, false if not
def idle?
state == :idle
end
# Returns a symbol representing the current worker state,
# which can be either :working or :idle
def state
redis.exists("worker:#{self}") ? :working : :idle
end
# Is this worker the same as another worker?
def ==(other)
to_s == other.to_s
end
def inspect
"#<Worker #{to_s}>"
end
# The string representation is the same as the id for this worker
# instance. Can be used with `Worker.find`.
def to_s
@to_s ||= "#{hostname}:#{Process.pid}:#{@queues.join(',')}"
end
alias_method :id, :to_s
# chomp'd hostname of this machine
def hostname
@hostname ||= `hostname`.chomp
end
# Returns Integer PID of running worker
def pid
@pid ||= to_s.split(":")[1].to_i
end
# Returns an Array of string pids of all the other workers on this
# machine. Useful when pruning dead workers on startup.
def worker_pids
if RUBY_PLATFORM =~ /solaris/
solaris_worker_pids
else
linux_worker_pids
end
end
# Find Resque worker pids on Linux and OS X.
#
# Returns an Array of string pids of all the other workers on this
# machine. Useful when pruning dead workers on startup.
def linux_worker_pids
`ps -A -o pid,command | grep "[r]esque" | grep -v "resque-web"`.split("\n").map do |line|
line.split(' ')[0]
end
end
# Find Resque worker pids on Solaris.
#
# Returns an Array of string pids of all the other workers on this
# machine. Useful when pruning dead workers on startup.
def solaris_worker_pids
`ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
real_pid = line.split(' ')[0]
pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
if pargs_command.split(':')[1] == " resque-#{Resque::Version}"
real_pid
end
end.compact
end
# Given a string, sets the procline ($0) and logs.
# Procline is always in the format of:
# resque-VERSION: STRING
def procline(string)
$0 = "resque-#{Resque::Version}: #{string}"
log! $0
end
# Log a message to STDOUT if we are verbose or very_verbose.
def log(message)
if verbose
puts "*** #{message}"
elsif very_verbose
time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
puts "** [#{time}] #$$: #{message}"
end
end
# Logs a very verbose message to STDOUT.
def log!(message)
log message if very_verbose
end
end
end