Permalink
Browse files

minor code cleanup

  • Loading branch information...
1 parent 6682409 commit f4bae10250c0c7c3786bf8a82b19cca94babfd8d @cantino cantino committed Feb 11, 2014
Showing with 42 additions and 41 deletions.
  1. +42 −40 app/models/agents/website_agent.rb
  2. +0 −1 spec/models/agents/website_agent_spec.rb
@@ -6,6 +6,11 @@ module Agents
class WebsiteAgent < Agent
cannot_receive_events!
+ default_schedule "every_12h"
+
+ UNIQUENESS_LOOK_BACK = 200
+ UNIQUENESS_FACTOR = 3
+
description <<-MD
The WebsiteAgent scrapes a website, XML document, or JSON feed and creates Events based on the results.
@@ -34,20 +39,15 @@ class WebsiteAgent < Agent
Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `username:password`.
- Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent (only used to set the "working" status).
+ Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent. This is only used to set the "working" status.
- Set `uniqueness_look_back` (defaults to the larger of 200, 3x the number of received events) to limit the number of events checked for uniqueness (typically for performance).
+ Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance). This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results.
MD
event_description do
"Events will have the fields you specified. Your options look like:\n\n #{Utils.pretty_print options['extract']}"
end
- default_schedule "every_12h"
-
- UNIQUENESS_LOOK_BACK = 200
- UNIQUENESS_FACTOR = 3
-
def working?
event_created_within?(options['expected_update_period_in_days']) && !recent_error_logs?
end
@@ -66,54 +66,44 @@ def default_options
end
def validate_options
- # Check required fields are present
+ # Check for required fields
errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
if !options['extract'].present? && extraction_type != "json"
errors.add(:base, "extract is required for all types except json")
end
- # Check options:
+
+ # Check for optional fields
if options['mode'].present?
- if options['mode'] != "on_change" && options['mode'] != "all"
- errors.add(:base, "mode should be all or on_change")
- end
+ errors.add(:base, "mode must be set to on_change or all") unless %w[on_change all].include?(options['mode'])
end
- # Check integer variables:
+
if options['expected_update_period_in_days'].present?
- begin
- Integer(options['expected_update_period_in_days'])
- rescue
- errors.add(:base, "Invalid expected_update_period_in_days format")
- end
+ errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
end
+
if options['uniqueness_look_back'].present?
- begin
- Integer(options['uniqueness_look_back'])
- rescue
- errors.add(:base, "Invalid uniqueness_look_back format")
- end
+ errors.add(:base, "Invalid uniqueness_look_back format") unless is_positive_integer?(options['uniqueness_look_back'])
end
end
def check
hydra = Typhoeus::Hydra.new
log "Fetching #{options['url']}"
- request_opts = {:followlocation => true}
- if options['basic_auth'].present?
- request_opts[:userpwd] = options['basic_auth']
- end
+ request_opts = { :followlocation => true }
+ request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present?
request = Typhoeus::Request.new(options['url'], request_opts)
+
request.on_failure do |response|
error "Failed: #{response.inspect}"
end
+
request.on_success do |response|
doc = parse(response.body)
if extract_full_json?
- old_events = previous_payloads 1
- result = doc
- if store_payload? old_events, result
- log "Storing new result for '#{name}': #{result.inspect}"
- create_event :payload => result
+ if store_payload!(previous_payloads(1), doc)
+ log "Storing new result for '#{name}': #{doc.inspect}"
+ create_event :payload => doc
end
else
output = {}
@@ -152,7 +142,7 @@ def check
end
end
- if store_payload? old_events, result
+ if store_payload!(old_events, result)
log "Storing new parsed result for '#{name}': #{result.inspect}"
create_event :payload => result
end
@@ -165,16 +155,20 @@ def check
private
- def store_payload?(old_events, result)
- if !options['mode']
+ # This method returns true if the result should be stored as a new event.
+ # If mode is set to 'on_change', this method may return false and update an existing
+ # event to expire further in the future.
+ def store_payload!(old_events, result)
+ if !options['mode'].present?
return true
elsif options['mode'].to_s == "all"
return true
elsif options['mode'].to_s == "on_change"
+ result_json = result.to_json
old_events.each do |old_event|
- if old_event.payload.to_json == result.to_json
+ if old_event.payload.to_json == result_json
old_event.expires_at = new_event_expiration_date
- old_event.save
+ old_event.save!
return false
end
end
@@ -187,8 +181,8 @@ def previous_payloads(num_events)
if options['uniqueness_look_back'].present?
look_back = options['uniqueness_look_back'].to_i
else
- # Larger of UNIQUENESS_FACTOR*num_events and UNIQUENESS_LOOK_BACK
- look_back = UNIQUENESS_FACTOR*num_events
+ # Larger of UNIQUENESS_FACTOR * num_events and UNIQUENESS_LOOK_BACK
+ look_back = UNIQUENESS_FACTOR * num_events
if look_back < UNIQUENESS_LOOK_BACK
look_back = UNIQUENESS_LOOK_BACK
end
@@ -197,7 +191,7 @@ def previous_payloads(num_events)
end
def extract_full_json?
- (!options['extract'].present? && extraction_type == "json")
+ !options['extract'].present? && extraction_type == "json"
end
def extraction_type
@@ -224,5 +218,13 @@ def parse(data)
raise "Unknown extraction type #{extraction_type}"
end
end
+
+ def is_positive_integer?(value)
+ begin
+ Integer(value) >= 0
+ rescue
+ false
+ end
+ end
end
end
@@ -256,5 +256,4 @@
end
end
end
-
end

0 comments on commit f4bae10

Please sign in to comment.