real swell config.example.yml -- explanation galore

documentcloud · Aug 28, 2009 · 370bf3e · 370bf3e
1 parent 903a402
commit 370bf3e
Show file tree

Hide file tree

Showing 6 changed files with 54 additions and 17 deletions.
diff --git a/TODO b/TODO
@@ -2,10 +2,12 @@ TODO:
 	* Monitor UI -- see queue of jobs / work units.
 	* Job creation UI. Pick an action, add inputs, set options, go.
 	* Basic integration tests.
-	* Documentation. (document inside of config.yml, too)
 	* Unit tests for lib.
 	* Think about having the completion of a work unit respond with another one,
 	  instead of having to make a second request.
+	* Think about having a failed work unit shuffle off to the end of the queue,
+		instead of going again right away (and probably being handed back to the
+		very same daemon).
 
 DONE:
 	* Right now actions just process. Think about map / process / reduce.
@@ -15,5 +17,5 @@ DONE:
 	* Security. S3 authenticated URLs.
 	* Benchmarking mode -- benchmark every single aspect of a job and report it.
 	* Pare down the code that the workers are loading to the absolute minimum.
-
+	* Documentation. (document inside of config.yml, too)
 
diff --git a/config/config.example.yml b/config/config.example.yml
@@ -1,16 +1,46 @@
-:num_workers:             4
-:default_worker_wait:     1
-:max_worker_wait:         20
-:worker_wait_multiplier:  1.3
-:worker_retry_wait:       5
-:work_unit_retries:       3
-
+# The URL where you're planning on running the server/queue/database.
 :central_server:          http://localhost:9173
+
+# Please provide your AWS credentials for S3 storage of job output.
+:aws_access_key:          [your AWS access key]
+:aws_secret_key:          [your AWS secret access key]
+
+# Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
+# to keep all resulting files on S3 private. If so, you'll receive authenticated
+# S3 URLs as job output, good for 24 hours. If left public, you'll get the
+# straight URLs to the files on S3.
+:s3_bucket:               [your CloudCrowd bucket]
+:use_s3_authentication:   no
+
+# Use HTTP Basic Auth for all requests? (Includes all internal worker requests 
+# to the central server). If yes, specify the login and password that all 
+# requests must provide for authentication.
 :use_http_authentication: no
 :login:                   [your login name]
 :password:                [your password]
 
-:use_s3_authentication:   no
-:s3_bucket:               [your CloudCrowd bucket]
-:aws_access_key:          [your AWS access key]
-:aws_secret_key:          [your AWS secret access key]
+# Set the following numbers to tweak the configuration of your worker daemons. 
+# Optimum results will depend on proportion of the Memory/CPU/IO bottlenecks
+# in your actions, the number of central servers you have running, and your
+# desired balance between latency and traffic.
+
+# The number of workers that `crowd workers start` spins up.
+:num_workers:             4
+
+# The minimum number of seconds a worker waits between checking the job queue.
+:min_worker_wait:         1
+
+# The maximum number of seconds a worker waits between checking the job queue.
+:max_worker_wait:         20
+
+# The backoff multiplier the worker uses to slow down the check interval when 
+# there's no work in the queue.
+:worker_wait_multiplier:  1.3
+
+# The number of seconds a worker waits to retry when there's some kind of 
+# internal error (ie. the central server fails to respond)
+:worker_retry_wait:       5
+
+# The number of separate attempts that will be made to process an individual
+# work unit, before marking it as having failed.
+:work_unit_retries:       3
diff --git a/config/database.example.yml b/config/database.example.yml
@@ -1,3 +1,6 @@
+# This is a standard ActiveRecord database.yml file. You can configure it 
+# to use any database that ActiveRecord supports.
+
 :adapter:  mysql
 :encoding: utf8
 :username: root

diff --git a/lib/cloud-crowd.rb b/lib/cloud-crowd.rb
@@ -1,3 +1,5 @@
+# The Grand Central of code loading...
+
 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
 
 # Common Gems:

diff --git a/lib/cloud_crowd/daemon.rb b/lib/cloud_crowd/daemon.rb
@@ -10,12 +10,12 @@ module CloudCrowd
   # isn't any work to be done, and speeds back up when there is.
   class Daemon
 
-    DEFAULT_WAIT    = CloudCrowd.config[:default_worker_wait]
+    MIN_WAIT        = CloudCrowd.config[:min_worker_wait]
     MAX_WAIT        = CloudCrowd.config[:max_worker_wait]
     WAIT_MULTIPLIER = CloudCrowd.config[:worker_wait_multiplier]
 
     def initialize
-      @wait_time = DEFAULT_WAIT
+      @wait_time = MIN_WAIT
       @worker = CloudCrowd::Worker.new
       Signal.trap('INT',  'EXIT')
       Signal.trap('KILL', 'EXIT')
@@ -32,7 +32,7 @@ def run
         @worker.fetch_work_unit
         if @worker.has_work?
           @worker.run
-          @wait_time = DEFAULT_WAIT
+          @wait_time = MIN_WAIT
           sleep 0.01 # So as to listen for incoming signals.
         else
           @wait_time = [@wait_time * WAIT_MULTIPLIER, MAX_WAIT].min

diff --git a/test/config/config.yml b/test/config/config.yml
@@ -1,5 +1,5 @@
 :num_workers:             4
-:default_worker_wait:     1
+:min_worker_wait:         1
 :max_worker_wait:         20
 :worker_wait_multiplier:  1.3
 :worker_retry_wait:       5