Skip to content

Commit

Permalink
adding user collection to mongo and klout score as tweets are loaded
Browse files Browse the repository at this point in the history
  • Loading branch information
Hilary Mason committed Aug 14, 2010
1 parent 89c5e46 commit d90becc
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions load_tweets.py
Expand Up @@ -8,14 +8,17 @@
"""

import sys, os
import datetime
import pymongo
import tweepy # Twitter API class: http://github.com/joshthecoder/tweepy
from lib import mongodb
from lib import klout
from classifiers.classify_tweets import *
import settings # local app settings

class loadTweets(object):
DB_NAME = 'tweets'
USER_COLL_NAME = 'users'

def __init__(self, debug=False):
self.debug = debug
Expand All @@ -39,6 +42,7 @@ def fetchTweets(self, since_id=None):

# parse each incoming tweet
ts = []
authors = []
for tweet in tweets:
t = {
'author': tweet.author.screen_name,
Expand All @@ -64,7 +68,36 @@ def fetchTweets(self, since_id=None):
'truncated': tweet.truncated,
'user': tweet.user.screen_name,
}
u = {
'_id': tweet.author.screen_name, # use as mongo primary key
'contributors_enabled': tweet.author.contributors_enabled,
'created_at': tweet.author.created_at,
'description': tweet.author.description,
'favourites_count': tweet.author.favourites_count, # beware the british
'follow_request_sent': tweet.author.follow_request_sent,
'followers_count': tweet.author.followers_count,
'following': tweet.author.following,
'friends_count': tweet.author.friends_count,
'geo_enabled': tweet.author.geo_enabled,
'twitter_user_id': tweet.author.id,
'lang': tweet.author.lang,
'listed_count': tweet.author.listed_count,
'location': tweet.author.location,
'name': tweet.author.name,
'notifications': tweet.author.notifications,
'profile_image_url': tweet.author.profile_image_url,
'protected': tweet.author.protected,
'statuses_count': tweet.author.statuses_count,
'time_zone': tweet.author.time_zone,
'url': tweet.author.url,
'utc_offset': tweet.author.utc_offset,
'verified': tweet.author.verified,
'_updated': datetime.datetime.now(),
}
authors.append(u)
ts.append(t)

self.update_authors(authors)

# insert into db
try:
Expand All @@ -75,6 +108,25 @@ def fetchTweets(self, since_id=None):
if self.debug:
print "added %s tweets to the db" % (len(ts))

def update_authors(self, authors):
k = klout.KloutAPI(settings.KLOUT_API_KEY)
update_count = 0

for user in authors:
records = [r for r in self.db[self.USER_COLL_NAME].find(spec={'_id': user['_id']})]
if not records or abs(records[0]['_updated'] - datetime.datetime.now()) >= datetime.timedelta(1): # update once per day
kwargs = { 'users': user['_id'] }
response = k.call('klout', **kwargs)
user['klout_score'] = response['users'][0]['kscore']
self.db[self.USER_COLL_NAME].remove({'_id': user['_id']})
self.db[self.USER_COLL_NAME].insert(user)
update_count += 1

if self.debug:
print "updated %s users in the db" % (update_count)



def classify_tweets(self):
classifiers = []
for active_classifier in active_classifiers:
Expand Down

0 comments on commit d90becc

Please sign in to comment.