-
Notifications
You must be signed in to change notification settings - Fork 2
/
hnbot.rb
111 lines (94 loc) · 3.32 KB
/
hnbot.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# HNBot lets you track comments made by people you follow via mavenn,
#
class HNBot
BASE_URL = 'http://news.ycombinator.com'
NEW_LNKS = "#{BASE_URL}/newest"
NEW_CMTS = "#{BASE_URL}/newcomments"
ITEM_URL_PREFIX = "#{BASE_URL}/item?id="
# A REP friendly bot honoring `curl news.ycombinator.com/robots.txt`
# Politeness restricts us to /, /newest, /newcomments and /item with a
# wait_interval between 30sec to 1min.
#
# Fetch of /newcomments storing Comments and stub Postings
# Fetches full Postings for stub postings
# Posts activity to mavenn
def self.stats
{
:posts => Posting.count,
:avatars => Avatar.count,
:watched => Avatar.watched(:count),
}
end
# Fetch newest comments (first page only)
def self.fetch_comments
last_fetch = Setting.getval(:fetch_comments) || Time.now - 1.day
Setting.setval(:fetch_comments, (tm = Time.now))
STDERR.puts("fetch_comments: begin #{tm}")
sleep 42*rand # create some variability
CommentList.new(NEW_CMTS).crawl # only gets one page
end
# Fetch posts on which watchlist avatars have commented.
def self.fetch_postings
count = 0
last_fetch = Setting.getval(:fetch_postings) || Time.now - 1.day
if fetching = Setting.getval(:fetch_postings_underway)
STDERR.puts("fetch_posting: underway")
return false
end
Setting.setval(:fetch_postings, (tm = Time.now))
STDERR.puts("fetch_postings: begin: #{tm}")
sleep 10*rand
link = Link.new(ITEM_URL_PREFIX)
Posting.unfetched.each do |posting|
begin
Setting.setval(:fetch_postings_underway, true)
next if not posting.valid
link.item = posting
link.crawl
count += 1
rescue Posting::NoSuchItem, Posting::Dead
# soldier on
end
end
tm = ((Time.now - tm)/1.second).ceil
ensure
Setting.setval(:fetch_postings_seconds, tm)
Setting.setval(:fetch_postings_underway, false)
STDERR.puts("fetch_postings: did #{count} in #{tm || 0} sec")
end
# Post latest activity to mavenn via API
def self.post_activity
STDERR.puts "post_activity: #{Time.now} "
items = reqs = scx = 0
mark_for_deletion = []
Stream.where(:mavenn.ne => false).all.each do |stream|
this_post = Time.now
activity = stream.tuples(:since => stream.posted_at)
scx += 1
next if activity.blank?
json = {:activity => activity}.to_json
uri = "#{SiteConfig.mavenn}/2010-10-17/streams/#{stream.sid}/activity"
uri = URI.parse(uri)
http = Net::HTTP.new(uri.host, uri.port)
req = Net::HTTP::Post.new(uri.request_uri)
req.content_type = "application/json"
req.body = json
req.basic_auth(SiteConfig.apid, SiteConfig.token)
rsp = http.request(req)
STDERR.puts "#{rsp.code} #{stream.title} #{activity.size} items"
if rsp.code == '200'
items += activity.size
reqs += 1
stream.set(:posted_at => this_post)
end
mark_for_deletion << stream if rsp.code == '410' # gone
# Ignore 500s and 422
sleep 30
end
bad = 0; mark_for_deletion.each { |st| st.destroy; bad += 1 }
rescue Errno::ECONNREFUSED
STDERR.puts "**** post_activity: Server down?"
ensure
STDERR.puts "post_activity: #{items}, #{reqs} reqs, #{scx} mavenn, #{bad} deleted"
end
end