Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

adding a basic RSS agent #407

Merged
merged 1 commit into from

2 participants

@cantino
Owner

Adding a basic RSS Agent. Appreciate code review!

@cantino cantino referenced this pull request
Closed

Real RSS Agent #11

@coveralls

Coverage Status

Coverage increased (+0.23%) when pulling f4df522 on rss_agent into d0a8446 on master.

@cantino cantino merged commit 5183f3d into from
@cantino cantino deleted the branch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jul 21, 2014
  1. adding a basic RSS agent

    authored
This page is out of date. Refresh to see the latest.
View
1  Gemfile
@@ -72,6 +72,7 @@ gem 'em-http-request', '~> 1.1.2'
gem 'weibo_2', '~> 0.1.4'
gem 'hipchat', '~> 1.2.0'
gem 'xmpp4r', '~> 0.5.6'
+gem 'feed-normalizer'
gem 'slack-notifier', '~> 0.5.0'
gem 'therubyracer', '~> 0.12.1'
View
6 Gemfile.lock
@@ -116,6 +116,9 @@ GEM
multipart-post (>= 1.2, < 3)
faraday_middleware (0.9.1)
faraday (>= 0.7.4, < 0.10)
+ feed-normalizer (1.5.2)
+ hpricot (>= 0.6)
+ simple-rss (>= 1.1)
ffi (1.9.3)
forecast_io (2.0.0)
faraday
@@ -144,6 +147,7 @@ GEM
hike (1.2.3)
hipchat (1.2.0)
httparty
+ hpricot (0.8.6)
http (0.5.1)
http_parser.rb
http_parser.rb (0.6.0)
@@ -265,6 +269,7 @@ GEM
faraday (>= 0.9.0.rc5)
jwt (>= 0.1.5)
multi_json (>= 1.0.0)
+ simple-rss (1.3.1)
simple_oauth (0.2.0)
simplecov (0.8.2)
docile (~> 1.1.0)
@@ -359,6 +364,7 @@ DEPENDENCIES
em-http-request (~> 1.1.2)
faraday (~> 0.9.0)
faraday_middleware
+ feed-normalizer
forecast_io (~> 2.0.0)
foreman (~> 0.63.0)
geokit (~> 1.8.4)
View
61 app/concerns/web_request_concern.rb
@@ -0,0 +1,61 @@
+module WebRequestConcern
+ extend ActiveSupport::Concern
+
+ def validate_web_request_options!
+ if options['user_agent'].present?
+ errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
+ end
+
+ unless headers(options['headers']).is_a?(Hash)
+ errors.add(:base, "if provided, headers must be a hash")
+ end
+
+ begin
+ basic_auth_credentials(options['basic_auth'])
+ rescue ArgumentError => e
+ errors.add(:base, e.message)
+ end
+ end
+
+ def faraday
+ @faraday ||= Faraday.new { |builder|
+ builder.headers = headers if headers.length > 0
+
+ if (user_agent = interpolated['user_agent']).present?
+ builder.headers[:user_agent] = user_agent
+ end
+
+ builder.use FaradayMiddleware::FollowRedirects
+ builder.request :url_encoded
+ if userinfo = basic_auth_credentials
+ builder.request :basic_auth, *userinfo
+ end
+
+ case backend = faraday_backend
+ when :typhoeus
+ require 'typhoeus/adapters/faraday'
+ end
+ builder.adapter backend
+ }
+ end
+
+ def headers(value = interpolated['headers'])
+ value.presence || {}
+ end
+
+ def basic_auth_credentials(value = interpolated['basic_auth'])
+ case value
+ when nil, ''
+ return nil
+ when Array
+ return value if value.size == 2
+ when /:/
+ return value.split(/:/, 2)
+ end
+ raise ArgumentError.new("bad value for basic_auth: #{value.inspect}")
+ end
+
+ def faraday_backend
+ ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
+ end
+end
View
89 app/models/agents/rss_agent.rb
@@ -0,0 +1,89 @@
+require 'rss'
+require 'feed-normalizer'
+
+module Agents
+ class RssAgent < Agent
+ include WebRequestConcern
+
+ cannot_receive_events!
+ default_schedule "every_1d"
+
+ description do
+ <<-MD
+ This Agent consumes RSS feeds and emits events when they change.
+
+ (If you want to *output* an RSS feed, use the DataOutputAgent. Also, you can technically parse RSS and XML feeds
+ with the WebsiteAgent as well. See [this example](https://github.com/cantino/huginn/wiki/Agent-configuration-examples#itunes-trailers).)
+
+ Options:
+
+ * `url` - The URL of the RSS feed.
+ * `clean` - Attempt to use [feed-normalizer](https://github.com/aasmith/feed-normalizer)'s' `clean!` method to cleanup HTML in the feed. Set to `true` to use.
+ * `expected_update_period_in_days` - How often you expect this RSS feed to change. If more than this amount of time passes without an update, the Agent will mark itself as not working.
+ MD
+ end
+
+ def default_options
+ {
+ 'expected_update_period_in_days' => "5",
+ 'clean' => 'false',
+ 'url' => "https://github.com/cantino/huginn/commits/master.atom"
+ }
+ end
+
+ def working?
+ event_created_within?((interpolated['expected_update_period_in_days'].presence || 10).to_i) && !recent_error_logs?
+ end
+
+ def validate_options
+ errors.add(:base, "url is required") unless options['url'].present?
+
+ unless options['expected_update_period_in_days'].present? && options['expected_update_period_in_days'].to_i > 0
+ errors.add(:base, "Please provide 'expected_update_period_in_days' to indicate how many days can pass without an update before this Agent is considered to not be working")
+ end
+
+ validate_web_request_options!
+ end
+
+ def check
+ response = faraday.get(interpolated['url'])
+ if response.success?
+ feed = FeedNormalizer::FeedNormalizer.parse(response.body)
+ feed.clean! if interpolated['clean'] == 'true'
+ created_event_count = 0
+ feed.entries.each do |entry|
+ if check_and_track(entry.id)
+ created_event_count += 1
+ create_event(:payload => {
+ :id => entry.id,
+ :date_published => entry.date_published,
+ :last_updated => entry.last_updated,
+ :urls => entry.urls,
+ :description => entry.description,
+ :content => entry.content,
+ :title => entry.title,
+ :authors => entry.authors,
+ :categories => entry.categories
+ })
+ end
+ end
+ log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)."
+ else
+ error "Failed to fetch #{interpolated['url']}: #{response.inspect}"
+ end
+ end
+
+ protected
+
+ def check_and_track(entry_id)
+ memory['seen_ids'] ||= []
+ if memory['seen_ids'].include?(entry_id)
+ false
+ else
+ memory['seen_ids'].unshift entry_id
+ memory['seen_ids'].pop if memory['seen_ids'].length > 500
+ true
+ end
+ end
+ end
+end
View
57 app/models/agents/website_agent.rb
@@ -5,6 +5,7 @@
module Agents
class WebsiteAgent < Agent
+ include WebRequestConcern
default_schedule "every_12h"
@@ -109,19 +110,7 @@ def validate_options
end
end
- if options['user_agent'].present?
- errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
- end
-
- unless headers.is_a?(Hash)
- errors.add(:base, "if provided, headers must be a hash")
- end
-
- begin
- basic_auth_credentials()
- rescue => e
- errors.add(:base, e.message)
- end
+ validate_web_request_options!
end
def check
@@ -291,47 +280,5 @@ def is_positive_integer?(value)
false
end
end
-
- def faraday
- @faraday ||= Faraday.new { |builder|
- builder.headers = headers if headers.length > 0
-
- if (user_agent = interpolated['user_agent']).present?
- builder.headers[:user_agent] = user_agent
- end
-
- builder.use FaradayMiddleware::FollowRedirects
- builder.request :url_encoded
- if userinfo = basic_auth_credentials()
- builder.request :basic_auth, *userinfo
- end
-
- case backend = faraday_backend
- when :typhoeus
- require 'typhoeus/adapters/faraday'
- end
- builder.adapter backend
- }
- end
-
- def faraday_backend
- ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
- end
-
- def basic_auth_credentials
- case value = interpolated['basic_auth']
- when nil, ''
- return nil
- when Array
- return value if value.size == 2
- when /:/
- return value.split(/:/, 2)
- end
- raise "bad value for basic_auth: #{value.inspect}"
- end
-
- def headers
- interpolated['headers'].presence || {}
- end
end
end
View
356 spec/data_fixtures/github_rss.atom
@@ -0,0 +1,356 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
+ <id>tag:github.com,2008:/cantino/huginn/commits/master</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commits/master"/>
+ <link type="application/atom+xml" rel="self" href="https://github.com/cantino/huginn/commits/master.atom"/>
+ <title>Recent Commits to huginn:master</title>
+ <updated>2014-07-16T22:26:22-07:00</updated>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/d0a844662846cf3c83b94c637c1803f03db5a5b0</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"/>
+ <title>
+ Merge pull request #402 from albertsun/safer-liquid-migration
+ </title>
+ <updated>2014-07-16T22:26:22-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #402 from albertsun/safer-liquid-migration
+
+Inline models into migration&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/4a433806eeace44f1e39f02ac61cefdadf3597e2</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/4a433806eeace44f1e39f02ac61cefdadf3597e2"/>
+ <title>
+ inline models into migration
+ </title>
+ <updated>2014-07-16T15:25:08-04:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/382862?s=30"/>
+ <author>
+ <name>albertsun</name>
+ <uri>https://github.com/albertsun</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>inline models into migration&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/6ffa528ab0af7f9f5bb4b68437e7613e74fdb8c4</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/6ffa528ab0af7f9f5bb4b68437e7613e74fdb8c4"/>
+ <title>
+ Merge pull request #398 from knu/imap_use_uid
+ </title>
+ <updated>2014-07-15T19:47:37-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #398 from knu/imap_use_uid
+
+Use &quot;last seen UID&quot; in ImapFolderAgent&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/c7e29492c98652cc9738c374d02dcbb7c9bdeac6</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/c7e29492c98652cc9738c374d02dcbb7c9bdeac6"/>
+ <title>
+ Merge pull request #391 from theofpa/master
+ </title>
+ <updated>2014-07-12T15:19:56-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #391 from theofpa/master
+
+Ignore xmlns when evaluating xpath&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/f3552ece2e9af187bd5e613783dd27810b63c32f</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/f3552ece2e9af187bd5e613783dd27810b63c32f"/>
+ <title>
+ ImapFolderAgent: Emit a log message when creating an event or skipping it.
+ </title>
+ <updated>2014-07-11T19:19:12+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Emit a log message when creating an event or skipping it.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/d144d3797d2db362943357c6d85238ec657cfa06</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d144d3797d2db362943357c6d85238ec657cfa06"/>
+ <title>
+ ImapFolderAgent: Enable notification of mails already marked as read.
+ </title>
+ <updated>2014-07-11T19:08:55+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Enable notification of mails already marked as read.
+
+Add a condition key &quot;is_unread&quot; to allow user to select mails based on
+the read status.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/d1196a35ada22418bf0cf8b0d5947c2164e983e6</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d1196a35ada22418bf0cf8b0d5947c2164e983e6"/>
+ <title>
+ ImapFolderAgent: &quot;conditions&quot; must not actually be nil.
+ </title>
+ <updated>2014-07-11T18:02:09+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: &quot;conditions&quot; must not actually be nil.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/280c09415ea8114d8a128cd7c2583ae0e0aa480d</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/280c09415ea8114d8a128cd7c2583ae0e0aa480d"/>
+ <title>
+ ImapFolderAgent: Do not fail when port is blank.
+ </title>
+ <updated>2014-07-11T18:02:09+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Do not fail when port is blank.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/045fb957b2370d80190fa8dc036863076d8806fb</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/045fb957b2370d80190fa8dc036863076d8806fb"/>
+ <title>
+ ImapFolderAgent now recognizes &quot;true&quot;/&quot;false&quot; as boolean option values.
+ </title>
+ <updated>2014-07-11T18:02:09+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent now recognizes &quot;true&quot;/&quot;false&quot; as boolean option values.
+
+Add a utility method Agent#boolify to make it easier to handle boolean
+option values.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/c1b9caa8ccb0c8b8f6103fc80b90fba57a822435</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/c1b9caa8ccb0c8b8f6103fc80b90fba57a822435"/>
+ <title>
+ ImapFolderAgent: Unstringify integer keys of a hash saved in JSON.
+ </title>
+ <updated>2014-07-11T18:01:26+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Unstringify integer keys of a hash saved in JSON.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/6a06a32447721abc4477979610e36db0650e2f92</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/6a06a32447721abc4477979610e36db0650e2f92"/>
+ <title>
+ ImapFolderAgent: Only keep a single UID value for each folder in memory.
+ </title>
+ <updated>2014-07-11T18:01:26+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Only keep a single UID value for each folder in memory.
+
+Previously it used to keep a list of the UIDs of unread mails. Now we
+start to assume that UIDs in a folder identified by a UID VALIDITY value
+are strictly ascending (monotonically increasing) as suggested by RFC
+3501 and 4549 and just keep the highest UID seen in the last run.
+
+This enhancement will help reduce the size of memory typically where
+mails are left unread forever.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/9ed63e45b247c30a02e8e59b4d24fccbe8644876</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/9ed63e45b247c30a02e8e59b4d24fccbe8644876"/>
+ <title>
+ Merge pull request #397 from cantino/update_rails_and_gems
+ </title>
+ <updated>2014-07-05T16:34:29-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #397 from cantino/update_rails_and_gems
+
+upgrade rails and gems&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/87a7abda23a82305d7050ac0bb400ce36c863d01</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"/>
+ <title>
+ upgrade rails and gems
+ </title>
+ <updated>2014-07-05T08:01:36-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>upgrade rails and gems&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/ea7594fa976fe24bb7024b6e3e0d2881dd86033a</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/ea7594fa976fe24bb7024b6e3e0d2881dd86033a"/>
+ <title>
+ Merge pull request #396 from knu/show_propagate_immediately
+ </title>
+ <updated>2014-07-03T20:50:40-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #396 from knu/show_propagate_immediately
+
+Make propagate_immediately more visible in agent details and the diagram.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/0e80f5341587aace2c023b06eb9265b776ac4535</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"/>
+ <title>
+ Dashed line in a diagram indicates propagate_immediately being false.
+ </title>
+ <updated>2014-07-04T03:42:52+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Dashed line in a diagram indicates propagate_immediately being false.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/cf9cdfb3ac9d47b7fdf5d7669577c964bee9a186</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/cf9cdfb3ac9d47b7fdf5d7669577c964bee9a186"/>
+ <title>
+ Show the propagate_immediately flag in agent details.
+ </title>
+ <updated>2014-07-04T02:53:31+09:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+ <author>
+ <name>knu</name>
+ <uri>https://github.com/knu</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Show the propagate_immediately flag in agent details.&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/b1128335b8de98afc5cad1b2ca5573e3bab1da1d</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/b1128335b8de98afc5cad1b2ca5573e3bab1da1d"/>
+ <title>
+ Merge pull request #389 from dsander/silence_worker_status
+ </title>
+ <updated>2014-07-01T21:47:40-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #389 from dsander/silence_worker_status
+
+Supress logging for requests to the /worker_status&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/d25e670b1c040f78eb648120c117853421d522c3</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d25e670b1c040f78eb648120c117853421d522c3"/>
+ <title>
+ Merge pull request #393 from CloCkWeRX/google_calendar
+ </title>
+ <updated>2014-07-01T21:47:16-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #393 from CloCkWeRX/google_calendar
+
+Add Google calendar publish agent&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/d7b0e35aaaafec3032d3fe271b426f1e9d3727b4</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d7b0e35aaaafec3032d3fe271b426f1e9d3727b4"/>
+ <title>
+ switch to cantino-twitter-stream
+ </title>
+ <updated>2014-07-01T21:36:38-07:00</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+ <author>
+ <name>cantino</name>
+ <uri>https://github.com/cantino</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>switch to cantino-twitter-stream&lt;/pre>
+ </content>
+ </entry>
+ <entry>
+ <id>tag:github.com,2008:Grit::Commit/d465158f77dcd9078697e6167b50abbfdfa8b1af</id>
+ <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"/>
+ <title>
+ Shift to dev group
+ </title>
+ <updated>2014-07-01T16:37:47+09:30</updated>
+ <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/365751?s=30"/>
+ <author>
+ <name>CloCkWeRX</name>
+ <uri>https://github.com/CloCkWeRX</uri>
+ </author>
+ <content type="html">
+ &lt;pre style='white-space:pre-wrap;width:81ex'>Shift to dev group&lt;/pre>
+ </content>
+ </entry>
+</feed>
View
81 spec/models/agents/rss_agent_spec.rb
@@ -0,0 +1,81 @@
+require 'spec_helper'
+
+describe Agents::RssAgent do
+ before do
+ @valid_options = {
+ 'expected_update_period_in_days' => "2",
+ 'url' => "https://github.com/cantino/huginn/commits/master.atom",
+ }
+
+ stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200)
+ end
+
+ let(:agent) do
+ _agent = Agents::RssAgent.new(:name => "github rss feed", :options => @valid_options)
+ _agent.user = users(:bob)
+ _agent.save!
+ _agent
+ end
+
+ it_behaves_like WebRequestConcern
+
+ describe "validations" do
+ it "should validate the presence of url" do
+ agent.options['url'] = "http://google.com"
+ agent.should be_valid
+
+ agent.options['url'] = ""
+ agent.should_not be_valid
+
+ agent.options['url'] = nil
+ agent.should_not be_valid
+ end
+
+ it "should validate the presence and numericality of expected_update_period_in_days" do
+ agent.options['expected_update_period_in_days'] = "5"
+ agent.should be_valid
+
+ agent.options['expected_update_period_in_days'] = "wut?"
+ agent.should_not be_valid
+
+ agent.options['expected_update_period_in_days'] = 0
+ agent.should_not be_valid
+
+ agent.options['expected_update_period_in_days'] = nil
+ agent.should_not be_valid
+
+ agent.options['expected_update_period_in_days'] = ""
+ agent.should_not be_valid
+ end
+ end
+
+ describe "emitting RSS events" do
+ it "should emit items as events" do
+ lambda {
+ agent.check
+ }.should change { agent.events.count }.by(20)
+ end
+
+ it "should track ids and not re-emit the same item when seen again" do
+ agent.check
+ agent.memory['seen_ids'].should == agent.events.map {|e| e.payload['id'] }
+
+ newest_id = agent.memory['seen_ids'][0]
+ agent.events.first.payload['id'].should == newest_id
+ agent.memory['seen_ids'] = agent.memory['seen_ids'][1..-1] # forget the newest id
+
+ lambda {
+ agent.check
+ }.should change { agent.events.count }.by(1)
+
+ agent.events.first.payload['id'].should == newest_id
+ agent.memory['seen_ids'][0].should == newest_id
+ end
+
+ it "should truncate the seen_ids in memory at 500 items" do
+ agent.memory['seen_ids'] = ['x'] * 490
+ agent.check
+ agent.memory['seen_ids'].length.should == 500
+ end
+ end
+end
View
90 spec/models/agents/website_agent_spec.rb
@@ -4,9 +4,9 @@
describe "checking without basic auth" do
before do
stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
- @site = {
+ @valid_options = {
'name' => "XKCD",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "html",
'url' => "http://xkcd.com",
'mode' => 'on_change',
@@ -16,11 +16,13 @@
'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
}
}
- @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site, :keep_events_for => 2)
+ @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2)
@checker.user = users(:bob)
@checker.save!
end
+ it_behaves_like WebRequestConcern
+
describe "validations" do
before do
@checker.should be_valid
@@ -42,20 +44,6 @@
@checker.should be_valid
end
- it "should validate headers" do
- @checker.options['headers'] = "blah"
- @checker.should_not be_valid
-
- @checker.options['headers'] = ""
- @checker.should be_valid
-
- @checker.options['headers'] = {}
- @checker.should be_valid
-
- @checker.options['headers'] = { 'foo' => 'bar' }
- @checker.should be_valid
- end
-
it "should validate mode" do
@checker.options['mode'] = "nonsense"
@checker.should_not be_valid
@@ -97,16 +85,16 @@
it "should always save events when in :all mode" do
lambda {
- @site['mode'] = 'all'
- @checker.options = @site
+ @valid_options['mode'] = 'all'
+ @checker.options = @valid_options
@checker.check
@checker.check
}.should change { Event.count }.by(2)
end
it "should take uniqueness_look_back into account during deduplication" do
- @site['mode'] = 'all'
- @checker.options = @site
+ @valid_options['mode'] = 'all'
+ @checker.options = @valid_options
@checker.check
@checker.check
event = Event.last
@@ -114,47 +102,47 @@
event.save
lambda {
- @site['mode'] = 'on_change'
- @site['uniqueness_look_back'] = 2
- @checker.options = @site
+ @valid_options['mode'] = 'on_change'
+ @valid_options['uniqueness_look_back'] = 2
+ @checker.options = @valid_options
@checker.check
}.should_not change { Event.count }
lambda {
- @site['mode'] = 'on_change'
- @site['uniqueness_look_back'] = 1
- @checker.options = @site
+ @valid_options['mode'] = 'on_change'
+ @valid_options['uniqueness_look_back'] = 1
+ @checker.options = @valid_options
@checker.check
}.should change { Event.count }.by(1)
end
it "should log an error if the number of results for a set of extraction patterns differs" do
- @site['extract']['url']['css'] = "div"
- @checker.options = @site
+ @valid_options['extract']['url']['css'] = "div"
+ @checker.options = @valid_options
@checker.check
@checker.logs.first.message.should =~ /Got an uneven number of matches/
end
it "should accept an array for url" do
- @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
- @checker.options = @site
+ @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
+ @checker.options = @valid_options
lambda { @checker.save! }.should_not raise_error;
lambda { @checker.check }.should_not raise_error;
end
it "should parse events from all urls in array" do
lambda {
- @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
- @site['mode'] = 'all'
- @checker.options = @site
+ @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
+ @valid_options['mode'] = 'all'
+ @checker.options = @valid_options
@checker.check
}.should change { Event.count }.by(2)
end
it "should follow unique rules when parsing array of urls" do
lambda {
- @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
- @checker.options = @site
+ @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
+ @checker.options = @valid_options
@checker.check
}.should change { Event.count }.by(1)
end
@@ -170,7 +158,7 @@
}, :status => 200)
site = {
'name' => "Some JSON Response",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "json",
'url' => "http://no-encoding.example.com",
'mode' => 'on_change',
@@ -197,7 +185,7 @@
}, :status => 200)
site = {
'name' => "Some JSON Response",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "json",
'url' => "http://wrong-encoding.example.com",
'mode' => 'on_change',
@@ -248,11 +236,11 @@
end
it "parses XPath" do
- @site['extract'].each { |key, value|
+ @valid_options['extract'].each { |key, value|
value.delete('css')
value['xpath'] = "//*[@id='comic']//img"
}
- @checker.options = @site
+ @checker.options = @valid_options
@checker.check
event = Event.last
event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
@@ -263,7 +251,7 @@
it "should turn relative urls to absolute" do
rel_site = {
'name' => "XKCD",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "html",
'url' => "http://xkcd.com",
'mode' => "on_change",
@@ -291,7 +279,7 @@
stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
site = {
'name' => "Some JSON Response",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "json",
'url' => "http://json-site.com",
'mode' => 'on_change',
@@ -322,7 +310,7 @@
stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
site = {
'name' => "Some JSON Response",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "json",
'url' => "http://json-site.com",
'mode' => 'on_change',
@@ -358,7 +346,7 @@
stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
site = {
'name' => "Some JSON Response",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "json",
'url' => "http://json-site.com",
'mode' => 'on_change'
@@ -382,7 +370,7 @@
@event.payload = { 'url' => "http://xkcd.com" }
lambda {
- @checker.options = @site
+ @checker.options = @valid_options
@checker.receive([@event])
}.should change { Event.count }.by(1)
end
@@ -394,9 +382,9 @@
stub_request(:any, /example/).
with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
- @site = {
+ @valid_options = {
'name' => "XKCD",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "html",
'url' => "http://www.example.com",
'mode' => 'on_change',
@@ -407,7 +395,7 @@
},
'basic_auth' => "user:pass"
}
- @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @site)
+ @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
@checker.user = users(:bob)
@checker.save!
end
@@ -425,9 +413,9 @@
stub_request(:any, /example/).
with(headers: { 'foo' => 'bar', 'user_agent' => /Faraday/ }).
to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
- @site = {
+ @valid_options = {
'name' => "XKCD",
- 'expected_update_period_in_days' => 2,
+ 'expected_update_period_in_days' => "2",
'type' => "html",
'url' => "http://www.example.com",
'mode' => 'on_change',
@@ -436,7 +424,7 @@
'url' => { 'css' => "#comic img", 'attr' => "src" },
}
}
- @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site)
+ @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
@checker.user = users(:bob)
@checker.save!
end
View
66 spec/support/shared_examples/web_request_concern.rb
@@ -0,0 +1,66 @@
+require 'spec_helper'
+
+shared_examples_for WebRequestConcern do
+ let(:agent) do
+ _agent = described_class.new(:name => "some agent", :options => @valid_options || {})
+ _agent.user = users(:jane)
+ _agent
+ end
+
+ describe "validations" do
+ it "should be valid" do
+ agent.should be_valid
+ end
+
+ it "should validate user_agent" do
+ agent.options['user_agent'] = nil
+ agent.should be_valid
+
+ agent.options['user_agent'] = ""
+ agent.should be_valid
+
+ agent.options['user_agent'] = "foo"
+ agent.should be_valid
+
+ agent.options['user_agent'] = ["foo"]
+ agent.should_not be_valid
+
+ agent.options['user_agent'] = 1
+ agent.should_not be_valid
+ end
+
+ it "should validate headers" do
+ agent.options['headers'] = "blah"
+ agent.should_not be_valid
+
+ agent.options['headers'] = ""
+ agent.should be_valid
+
+ agent.options['headers'] = {}
+ agent.should be_valid
+
+ agent.options['headers'] = { 'foo' => 'bar' }
+ agent.should be_valid
+ end
+
+ it "should validate basic_auth" do
+ agent.options['basic_auth'] = "foo:bar"
+ agent.should be_valid
+
+ agent.options['basic_auth'] = ["foo", "bar"]
+ agent.should be_valid
+
+ agent.options['basic_auth'] = ""
+ agent.should be_valid
+
+ agent.options['basic_auth'] = nil
+ agent.should be_valid
+
+ agent.options['basic_auth'] = "blah"
+ agent.should_not be_valid
+
+ agent.options['basic_auth'] = ["blah"]
+ agent.should_not be_valid
+ end
+ end
+end
Something went wrong with that request. Please try again.