futzed with the S3 store. It now uses an SSL connection to upload fil…

…es when s3_authentication is on. Removed 'use_' prefix from s3_authentication and http_authentication.
documentcloud · Sep 16, 2009 · 48348a8 · 48348a8
1 parent 0179437
commit 48348a8
Show file tree

Hide file tree

Showing 9 changed files with 39 additions and 40 deletions.
diff --git a/config/config.example.yml b/config/config.example.yml
@@ -1,33 +1,33 @@
 # The URL where you're planning on running the central server/queue/database.
-:central_server:          http://localhost:9173
+:central_server:      http://localhost:9173
 
 # Set the maximum number of workers allowed per-node. Workers only run while 
 # there's work to be done. It's best to set 'max_workers' below the point where 
 # you'd start to swap or peg your CPU (as determined by experiment).
-:max_workers:             5
+:max_workers:         5
 
 # The storage back-end that you'd like to use for intermediate and final results
 # of processing. 's3' and 'filesystem' are supported. 'filesystem' should only
 # be used in development, or on single-machine installations.
-:storage:                 s3
+:storage:             s3
 
 # Please provide your AWS credentials for S3 storage of job output.
-:aws_access_key:          [your AWS access key]
-:aws_secret_key:          [your AWS secret access key]
+:aws_access_key:      [your AWS access key]
+:aws_secret_key:      [your AWS secret access key]
 
 # Choose an S3 bucket to store all CloudCrowd output, and decide if you'd like
 # to keep all resulting files on S3 private. If so, you'll receive authenticated
 # S3 URLs as job output, good for 24 hours. If left public, you'll get the
 # straight URLs to the files on S3.
-:s3_bucket:               [your CloudCrowd bucket]
-:use_s3_authentication:   no
+:s3_bucket:           [your CloudCrowd bucket]
+:s3_authentication:   no
 
 # Use HTTP Basic Auth for all requests? (Includes all internal worker requests 
 # to the central server). If yes, specify the login and password that all 
 # requests must provide for authentication.
-:use_http_authentication: no
-:login:                   [your login name]
-:password:                [your password]
+:http_authentication: no
+:login:               [your login name]
+:password:            [your password]
 
 # By default, CloudCrowd looks for installed actions inside the 'actions'
 # subdirectory of this configuration folder. 'actions_path' allows you to load
@@ -36,4 +36,4 @@
 
 # The number of separate attempts that will be made to process an individual
 # work unit, before marking it as having failed.
-:work_unit_retries:       3
+:work_unit_retries:   3
diff --git a/lib/cloud-crowd.rb b/lib/cloud-crowd.rb
@@ -30,7 +30,7 @@
 
 module CloudCrowd
 
-  # Autoload all the CloudCrowd classes which may not be required.
+  # Autoload all the CloudCrowd internals.
   autoload :Action,       'cloud_crowd/action'
   autoload :AssetStore,   'cloud_crowd/asset_store'
   autoload :Helpers,      'cloud_crowd/helpers'
@@ -45,10 +45,10 @@ module CloudCrowd
   # Root directory of the CloudCrowd gem.
   ROOT        = File.expand_path(File.dirname(__FILE__) + '/..')
 
-  # Keep the version in sync with the gemspec.
+  # Keep this version in sync with the gemspec.
   VERSION     = '0.1.1'
 
-  # A Job is processing if its WorkUnits in the queue to be handled by workers.
+  # A Job is processing if its WorkUnits are in the queue to be handled by nodes.
   PROCESSING  = 1
 
   # A Job has succeeded if all of its WorkUnits have finished successfully.
@@ -66,11 +66,10 @@ module CloudCrowd
   # back together into the final result.
   MERGING     = 5
 
-  # A work unit is considered to be complete if it succeeded or if it failed.
+  # A Job is considered to be complete if it succeeded or if it failed.
   COMPLETE    = [SUCCEEDED, FAILED]
 
-  # A work unit is considered incomplete if it's being processed, split up or 
-  # merged together.
+  # A Job is considered incomplete if it's being processed, split up or merged.
   INCOMPLETE  = [PROCESSING, SPLITTING, MERGING]
 
   # Mapping of statuses to their display strings.
@@ -93,12 +92,12 @@ def configure_database(config_path)
       ActiveRecord::Base.establish_connection(configuration)
     end
 
-    # Get a reference to the central server, including authentication, 
-    # if configured.
+    # Get a reference to the central server, including authentication if 
+    # configured.
     def central_server
       return @central_server if @central_server
       params = [CloudCrowd.config[:central_server]]
-      params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
+      params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:http_authentication]
       @central_server = RestClient::Resource.new(*params)
     end
 
@@ -111,7 +110,7 @@ def display_status(status)
     # CloudCrowd::Actions are requested dynamically by name. Access them through
     # this actions property, which behaves like a hash. At load time, we
     # load all installed Actions and CloudCrowd's default Actions into it.
-    # If you wish to have certain workers be specialized to only handle certain 
+    # If you wish to have certain nodes be specialized to only handle certain 
     # Actions, then install only those into the actions directory.
     def actions
       return @actions if @actions

diff --git a/lib/cloud_crowd/action.rb b/lib/cloud_crowd/action.rb
@@ -38,17 +38,19 @@ def process
 
     # Download a file to the specified path.
     def download(url, path)
-      URI.parse(url) # Sanity check.
       `curl -s "#{url}" > "#{path}"`
+      return path
+      # The previous implementation is below, and, although it would be 
+      # wonderful not to shell out, RestClient wasn't handling URLs with encoded
+      # entities (%20, for example), and doesn't let you download to a given
+      # location. Getting a RestClient patch in would be ideal.
+      #
       # if url.match(FILE_URL)
       #   FileUtils.cp(url.sub(FILE_URL, ''), path)
       # else
-      #   # An alternative would be shelling out: `curl -s "#{url}" > "#{path}"`
-      #   puts url
       #   resp = RestClient::Request.execute(:url => url, :method => :get, :raw_response => true)
       #   FileUtils.mv resp.file.path, path
       # end
-      path
     end
 
     # Takes a local filesystem path, saves the file to S3, and returns the 

diff --git a/lib/cloud_crowd/asset_store/s3_store.rb b/lib/cloud_crowd/asset_store/s3_store.rb
@@ -7,8 +7,14 @@ module S3Store
 
       # Configure authentication and establish a connection to S3, first thing.
       def setup
-        @use_auth = CloudCrowd.config[:use_s3_authentication]
-        establish_s3_connection
+        @use_auth   = CloudCrowd.config[:s3_authentication]
+        bucket_name = CloudCrowd.config[:s3_bucket]
+        key, secret = CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key]
+        protocol    = @use_auth ? 'https' : 'http'
+        port        = @use_auth ? 443 : 80
+        @s3         = RightAws::S3.new(key, secret, :protocol => protocol, :port => port)
+        @bucket     = @s3.bucket(bucket_name)
+        @bucket     = @s3.bucket(bucket_name, true) unless @bucket
       end
 
       # Save a finished file from local storage to S3. Save it publicly unless 
@@ -29,15 +35,6 @@ def cleanup(job)
         @bucket.delete_folder("#{job.action}/job_#{job.id}")
       end
 
-      # Workers, through the course of many WorkUnits, keep around an AssetStore.
-      # Ensure we have a persistent S3 connection after first use.
-      def establish_s3_connection
-        unless @s3 && @bucket
-          params = {:port => 80, :protocol => 'http'}
-          @s3 = RightAws::S3.new(CloudCrowd.config[:aws_access_key], CloudCrowd.config[:aws_secret_key], params)
-          @bucket = @s3.bucket(CloudCrowd.config[:s3_bucket], true)
-        end
-      end
     end
 
   end

diff --git a/lib/cloud_crowd/helpers/authorization.rb b/lib/cloud_crowd/helpers/authorization.rb
@@ -25,7 +25,7 @@ def authorized?
       # turned on, then every request is authenticated, including between 
       # the nodes and the central server.
       def authorize(login, password)
-        return true unless CloudCrowd.config[:use_http_authentication]
+        return true unless CloudCrowd.config[:http_authentication]
         return CloudCrowd.config[:login] == login &&
                CloudCrowd.config[:password] == password
       end

diff --git a/lib/cloud_crowd/models/node_record.rb b/lib/cloud_crowd/models/node_record.rb
@@ -51,7 +51,7 @@ def url
     def node
       return @node if @node
       params = [url]
-      params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:use_http_authentication]
+      params += [CloudCrowd.config[:login], CloudCrowd.config[:password]] if CloudCrowd.config[:http_authentication]
       @node = RestClient::Resource.new(*params)
     end
 

diff --git a/lib/cloud_crowd/node.rb b/lib/cloud_crowd/node.rb
@@ -18,7 +18,7 @@ class Node < Sinatra::Default
 
     # Enabling HTTP Authentication turns it on for all requests.
     before do
-      login_required if CloudCrowd.config[:use_http_authentication]
+      login_required if CloudCrowd.config[:http_authentication]
     end
 
     # To monitor a Node with Monit, God, Nagios, or another tool, you can hit 

diff --git a/lib/cloud_crowd/server.rb b/lib/cloud_crowd/server.rb
@@ -28,7 +28,7 @@ class Server < Sinatra::Default
 
     # Enabling HTTP Authentication turns it on for all requests.
     before do
-      login_required if CloudCrowd.config[:use_http_authentication]
+      login_required if CloudCrowd.config[:http_authentication]
     end
 
     # Render the admin console.

diff --git a/public/js/admin_console.js b/public/js/admin_console.js
@@ -117,6 +117,7 @@ window.Console = {
   },
 
   // Re-render all workers from scratch each time.
+  // This method is desperately in need of Javascript templates...
   renderNodes : function() {
     var header = $('#sidebar_header');
     var nc = this._nodes.length, wc = this._workerCount;