Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
180 lines (154 sloc) 6.77 KB
#!/usr/bin/env python
# encoding: utf-8
Created by Hilary Mason on 2010-04-25.
Copyright (c) 2010 Hilary Mason. All rights reserved.
import sys, os
import datetime
import subprocess
import pickle
import pymongo
import tweepy # Twitter API class:
from lib import mongodb
from lib import klout
from classifiers.classify_tweets import *
import settings # local app settings
class loadTweets(object):
DB_NAME = 'tweets'
USER_COLL_NAME = 'users'
def __init__(self, debug=False):
self.debug = debug
self.db = mongodb.connect(self.DB_NAME)
auth = tweepy.OAuthHandler(settings.CONSUMER_KEY, settings.CONSUMER_SECRET)
auth.set_access_token(settings.ACCESS_KEY, settings.ACCESS_SECRET)
self.api = tweepy.API(auth)
last_tweet_id = self.get_last_tweet_id()
except tweepy.error.TweepError: # authorization failure
print "You need to authorize tc to connect to your twitter account. I'm going to open a browser. Once you authorize, I'll ask for your PIN."
auth = self.setup_auth()
self.api = tweepy.API(auth)
def get_last_tweet_id(self):
for r in self.db[self.DB_NAME].find(fields={'id': True}).sort('id',direction=pymongo.DESCENDING).limit(1):
return r['id']
def fetchTweets(self, since_id=None):
if since_id:
tweets = self.api.home_timeline(since_id, count=500)
tweets = self.api.home_timeline(count=500)
# parse each incoming tweet
ts = []
authors = []
for tweet in tweets:
t = {
'contributors': tweet.contributors,
'coordinates': tweet.coordinates,
'created_at': tweet.created_at,
# 'destroy': tweet.destroy,
# 'favorite': tweet.favorite,
'favorited': tweet.favorited,
'geo': tweet.geo,
'in_reply_to_screen_name': tweet.in_reply_to_screen_name,
'in_reply_to_status_id': tweet.in_reply_to_status_id,
'in_reply_to_user_id': tweet.in_reply_to_user_id,
# 'parse': tweet.parse,
# 'parse_list': tweet.parse_list,
# 'retweet': dir(tweet.retweet),
# 'retweets': dir(tweet.retweets),
'source': tweet.source,
# 'source_url': tweet.source_url,
'text': tweet.text,
'truncated': tweet.truncated,
'user': tweet.user.screen_name,
u = {
'_id':, # use as mongo primary key
'favourites_count':, # beware the british
# insert into db
except pymongo.errors.InvalidOperation: # no tweets?
if self.debug:
print "added %s tweets to the db" % (len(ts))
def update_authors(self, authors):
k = klout.KloutAPI(settings.KLOUT_API_KEY)
update_count = 0
for user in authors:
records = [r for r in self.db[self.USER_COLL_NAME].find(spec={'_id': user['_id']})]
if not records or abs(records[0]['_updated'] - >= datetime.timedelta(1): # update once per day
kwargs = { 'users': user['_id'] }
response ='klout', **kwargs)
user['klout_score'] = response['users'][0]['kscore']
except klout.KloutError: # probably a 404
self.db[self.USER_COLL_NAME].remove({'_id': user['_id']})
update_count += 1
if self.debug:
print "updated %s users in the db" % (update_count)
def classify_tweets(self):
classifiers = []
for active_classifier in active_classifiers:
c = globals()[active_classifier]()
for r in self.db[self.DB_NAME].find(spec={'topics': {'$exists': False } },fields={'text': True, 'user': True}): # for all unclassified tweets
topics = {}
for c in classifiers:
(topic, score) = c.classify(r['text'])
topics[topic] = score
self.db[self.DB_NAME].update({'_id': r['_id']}, {'$set': {'topics': topics }})
# util classes
def setup_auth(self):
setup_auth: authorize tc with oath
auth = tweepy.OAuthHandler(settings.CONSUMER_KEY, settings.CONSUMER_SECRET)
auth_url = auth.get_authorization_url()
p = subprocess.Popen("open %s" % auth_url, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
print "( if the browser fails to open, please go to: %s )" % auth_url
verifier = raw_input("What's your PIN: ").strip()
pickle.dump((auth.access_token.key, auth.access_token.secret), open('settings_twitter_creds','w'))
return auth
def init_twitter(self, username, password):
auth = tweepy.BasicAuthHandler(username, password)
api = tweepy.API(auth)
return api
if __name__ == '__main__':
l = loadTweets(debug=True)
Something went wrong with that request. Please try again.