forked from feedjira/feedjira
/
feed.rb
126 lines (113 loc) · 4.46 KB
/
feed.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
module Feedzirra
class NoParserAvailable < StandardError; end
class Feed
USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
def self.parse(xml)
if parser = determine_feed_parser_for_xml(xml)
parser.parse(xml)
else
raise NoParserAvailable.new("no valid parser for content.")
end
end
def self.determine_feed_parser_for_xml(xml)
start_of_doc = xml.slice(0, 500)
feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
end
def self.add_feed_class(klass)
feed_classes.unshift klass
end
def self.feed_classes
@feed_classes ||= []
end
# can take a single url or an array of urls
# when passed a single url it returns the body of the response
# when passed an array of urls it returns a hash with the urls as keys and body of responses as values
def self.fetch_raw(urls, options = {})
urls = [*urls]
multi = Curl::Multi.new
responses = {}
urls.each do |url|
easy = Curl::Easy.new(url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
curl.follow_location = true
curl.on_success do |c|
responses[url] = c.body_str
end
curl.on_failure do |c|
responses[url] = c.response_code
end
end
multi.add(easy)
end
multi.perform
return responses.size == 1 ? responses.values.first : responses
end
def self.fetch_and_parse(urls, options = {})
urls = [*urls]
multi = Curl::Multi.new
responses = {}
urls.each do |url|
easy = Curl::Easy.new(url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
curl.follow_location = true
curl.on_success do |c|
feed = Feed.parse(c.body_str)
feed.feed_url ||= c.last_effective_url
feed.etag = etag_from_header(c.header_str)
feed.last_modified = last_modified_from_header(c.header_str)
responses[url] = feed
options[:on_success].call(url, feed) if options.has_key?(:on_success)
end
curl.on_failure do |c|
responses[url] = c.response_code
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end
multi.add(easy)
end
multi.perform
return responses.size == 1 ? responses.values.first : responses
end
def self.update(feeds, options = {})
feeds = [*feeds]
multi = Curl::Multi.new
responses = {}
feeds.each do |feed|
easy = Curl::Easy.new(feed.feed_url) do |curl|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
curl.headers["If-Modified-Since"] = feed.last_modified
curl.headers["If-None-Match"] = feed.etag if feed.etag
curl.follow_location = true
curl.on_success do |c|
updated_feed = Feed.parse(c.body_str)
updated_feed.feed_url ||= c.last_effective_url
updated_feed.etag = etag_from_header(c.header_str)
updated_feed.last_modified = last_modified_from_header(c.header_str)
feed.update_from_feed(updated_feed)
responses[feed.feed_url] = feed
options[:on_success].call(feed) if options.has_key?(:on_success)
end
curl.on_failure do |c|
responses[url] = c.response_code
options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
end
end
multi.add(easy)
end
multi.perform
return responses.size == 1 ? responses.values.first : responses.values
end
def self.etag_from_header(header)
header =~ /.*ETag:\s(.*)\r/
$1
end
def self.last_modified_from_header(header)
header =~ /.*Last-Modified:\s(.*)\r/
Time.parse($1) if $1
end
end
end