/
disqus_feeder.py
168 lines (131 loc) · 4.13 KB
/
disqus_feeder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python
"""
orbital disqus data feeder
~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: (c) 2012 DISQUS.
:license: Apache License 2.0, see LICENSE for more details.
"""
import gevent
import gevent.queue
import gevent.monkey
import disqusapi
import os
import pygeoip
import traceback
from gevent_zeromq import zmq
try:
from raven.base import Client
raven = Client(os.environ['SENTRY_DSN'])
except:
raven = None
# Create app.cfg and specify the following globals within it
config = {
# Your DISQUS API Secret key
# find this at https://disqus.com/api/applications/
'API_SECRET': '',
# Your DISQUS API Access Token
# find this at https://disqus.com/api/applications/
'ACCESS_TOKEN': '',
# The path to your (non-free) GeoIP city data file
# purchase this from http://www.maxmind.com/app/city
'GEOIP_PATH': '/usr/share/GeoIP/GeoIPCity.dat',
# The zeromq socket for the Orbital publisher server
'SERVER': 'tcp://127.0.0.1:5556',
}
def load_settings(filename, config, silent=True):
import errno
import imp
mod = imp.new_module('config')
mod.__file__ = filename
try:
execfile(filename, mod.__dict__)
except IOError, e:
if silent and e.errno in (errno.ENOENT, errno.EISDIR):
return False
e.strerror = 'Unable to load configuration file (%s)' % e.strerror
raise
for x in dir(mod):
if x.upper() == x:
config[x] = getattr(mod, x)
def log_exception(e):
if raven:
raven.captureException()
traceback.print_exc()
# disqusapi does not provide first-class support for gevent (yet)
gevent.monkey.patch_all()
# load our default settings
load_settings('app.cfg', config, silent=False)
assert config['API_SECRET'], "You must set your API_SECRET!"
assert config['ACCESS_TOKEN'], "You must set your ACCESS_TOKEN!"
api = disqusapi.DisqusAPI(secret_key=config['API_SECRET'])
geocoder = pygeoip.GeoIP(config['GEOIP_PATH'], pygeoip.MEMORY_CACHE)
def geocode_addr(addr):
try:
return geocoder.record_by_addr(addr)
except pygeoip.GeoIPError, e:
log_exception(e)
return {}
def anonymize(post):
return {
'link': post['thread']['link'],
'title': post['thread']['title'],
'icon': 'http://disqus.com/api/forums/favicons/%s.jpg' % (post['forum'],)
}
def main():
queue = gevent.queue.Queue(1000)
context = zmq.Context()
pub = context.socket(zmq.PUSH)
pub.connect(config['SERVER'])
def handle_post(post):
if 'approxyLoc' not in post:
print 'Post %r does not have approxyLoc field' % post['id']
return
print "New post", post['id']
data = {
'post': anonymize(post),
'lat': post['approxLoc']['lat'],
'lng': post['approxLoc']['lng'],
}
try:
queue.put_nowait(data)
except gevent.queue.Full:
return
def run_poller():
cursor = ''
order = 'desc'
while True:
print 'Fetching new posts (cursor is %r)' % cursor
try:
response = api.posts.list(cursor=cursor, forum=":moderated", order=order, related=['thread'],
limit=100, access_token=config['ACCESS_TOKEN'])
except (SystemExit, KeyboardInterrupt):
raise
except Exception, e:
log_exception(e)
gevent.sleep(.3)
continue
cursor = response.cursor['next']
order = 'asc'
for post in response:
try:
handle_post(post)
except (SystemExit, KeyboardInterrupt):
raise
except Exception, e:
log_exception(e)
continue
gevent.sleep(0)
gevent.sleep(.2)
def run_feeder():
while True:
data = queue.get()
pub.send_json(data)
gevent.sleep(0.2)
procs = [
gevent.spawn(run_poller),
gevent.spawn(run_feeder),
]
for proc in procs:
proc.join()
if __name__ == '__main__':
main()