Skip to content

Commit

Permalink
Fixed worker only grabs 100 users at a time. Closes gh-7
Browse files Browse the repository at this point in the history
Worker removes some unused user info to reduce transfer/parsing overhead
  • Loading branch information
cloudshao committed Dec 12, 2010
1 parent e509a7c commit 5d0e5a0
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 6 deletions.
55 changes: 51 additions & 4 deletions worker/request.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,54 @@
import urllib2
import simplejson as json

def getFollowersJson(user_id):
return urllib2.urlopen("http://api.twitter.com/1/statuses/followers.json?user_id="+str(user_id)).read()
FOLLOWERS_URL = 'http://api.twitter.com/1/statuses/followers.json?'
FOLLOWEES_URL = 'http://api.twitter.com/1/statuses/friends.json?'

def getFolloweesJson(user_id):
return urllib2.urlopen("http://api.twitter.com/1/statuses/friends.json?user_id="+str(user_id)).read()
def get_followers(user_id):
"""
Returns all the followers of a user
Raises HTTPError if something went wrong during the HTTP request
Keyword arguments:
user_id -- the id of the user
"""
return __get_user_list(user_id, FOLLOWERS_URL)

def get_followees(user_id):
"""
Returns all the followees (friends) of a user
Raises HTTPError if something went wrong during the HTTP request
Keyword arguments:
user_id -- the id of the user
"""
return __get_user_list(user_id, FOLLOWEES_URL)

def __get_user_list(user_id, url):
cursor = -1
users = []
while cursor:
response = urllib2.urlopen(url +
'user_id=' + str(user_id) +
'&cursor=' + str(cursor))
object = json.loads(response.read())
users.extend(object['users'])
cursor = object['next_cursor']
return __clean(users)

def __clean(user_list):
'''
Returns a copy of user_list with users that only have the fields we need
'''
temp_list = []
for user in user_list:
temp = {}
if 'id' in user: temp['id'] = user['id']
if 'name' in user: temp['name'] = user['name']
if 'screen_name' in user: temp['screen_name'] = user['screen_name']
if 'location' in user: temp['location'] = user['location']
if 'description' in user: temp['description'] = user['description']
if 'protected' in user: temp['protected'] = user['protected']
if 'status' in user: temp['status'] = user['status'].copy()
temp_list.append(temp)
return temp_list
4 changes: 2 additions & 2 deletions worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def crawl(user):
response = {'user':user}
try:
# Get the followers and followees from twitter
followers = json.loads(request.getFollowersJson(int(user)))
followees = json.loads(request.getFolloweesJson(int(user)))
followers = request.get_followers(int(user))
followees = request.get_followees(int(user))
response['followers'] = followers
response['followees'] = followees
except urllib2.HTTPError, e:
Expand Down

0 comments on commit 5d0e5a0

Please sign in to comment.