Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
tree: c0de319df4
Fetching contributors…

Cannot retrieve contributors at this time

executable file 215 lines (184 sloc) 8.746 kB
#!/usr/bin/env python
import webapp2
from google.appengine.api import xmpp
from google.appengine.ext import db
import logging
from md5 import md5
import simplejson as json
import tweepy
import bitly_api
import httplib
import time
import HTMLParser
from bs4 import UnicodeDammit
import re
import twitter_regex
from local_settings import TwitterKey, BitlyKey
# setup app datastore for posts/tweets
class TwitterDB(db.Model):
reddit_id = db.StringProperty()
created_at = db.DateTimeProperty(auto_now_add=True)
tweet_status = db.StringProperty(multiline=True)
is_tweeted = db.BooleanProperty()
subreddit = db.StringProperty()
# main class
class TwitterBot(webapp2.RequestHandler):
def get(self):
# get twitter app keys and authenticate
consumer_key = TwitterKey['consumer_key']
consumer_secret = TwitterKey['consumer_secret']
access_token = TwitterKey['access_token']
access_token_secret = TwitterKey['access_token_secret']
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
bot = tweepy.API(auth)
# authenticate bitly
shortapi = bitly_api.Connection(access_token=BitlyKey)
# IMPORTANT: SET UNIQUE USER AGENT
hdr = { 'User-Agent' : 'unique user agent by /u/redditusername' }
# list [subreddit name, subreddit nickname, score criteria]
subreddits = [ ['subreddit', 'subredditnickname', 0], ['subreddit', 'subredditnickname', 0] ]
# create multi-reddit request url
subredditsLength = len(subreddits)
requestUrl = '/r/'
requestLcv = 0
for sub in subreddits:
requestUrl = requestUrl + sub[0]
if requestLcv == (subredditsLength - 1):
requestUrl = requestUrl + '/hot.json?limit=100'
else:
requestUrl = requestUrl + '+'
requestLcv = requestLcv + 1
# connect to reddit.com and retrieve json
connError = 0
conn = httplib.HTTPConnection('www.reddit.com')
try:
conn.request('GET', requestUrl, headers=hdr)
html = conn.getresponse().read()
time.sleep(2)
jsondata = json.loads(str(html))
except:
connError = 1
conn.close()
# print success or error after connection attempt
if connError == 1:
self.response.out.write("<strong><em>Error Connecting!</em></strong><br />")
else:
self.response.out.write("<strong><em>Connected!</em></strong><br />")
tweets = ''
queued_tweets = ''
found_tweet = 0
# if json data retrieved
if 'data' in jsondata and 'children' in jsondata['data']:
posts = jsondata['data']['children']
posts.reverse()
# loop through json data
lcv = 1
for ind, post in enumerate(posts):
entry = post['data']
postid = entry['id']
query = TwitterDB.all()
score = entry['score']
query.filter('reddit_id =', postid)
res = query.get()
subname = entry['subreddit']
subnick = ''
subredditscore = 0
for sub in subreddits:
if sub[0].lower() == subname.lower():
subnick = sub[1]
subredditscore = sub[2]
break
# if post was not already stored in database and score criteria is met
if not res and (score >= subredditscore):
# get post title
title = entry['title']
# unescape html entities in the title
title = UnicodeDammit(title)
title = title.unicode_markup
h = HTMLParser.HTMLParser()
title = h.unescape(title)
# linkify all links that Twitter will wrap in a t.co
matches = re.findall(twitter_regex.REGEXEN['valid_url'], title)
matchresults = [match[2] for match in matches if match]
urlCount = 0
shortMatchResult = ''
shortMatchPos = 0
for matchresult in matchresults:
try:
shortMatchResult = shortapi.shorten(matchresult)
title = title.replace(matchresult, shortMatchResult['url'])
shortMatchPos = title.find(shortMatchResult['url'])
urlCount = urlCount + 1
except:
title = title.replace(matchresult, '')
# get link to reddit
try:
permalink = shortapi.shorten('http://www.reddit.com' + entry['permalink'])
except:
permalink = shortapi.shorten('http://www.reddit.com')
# get link if link.post
url = ''
url = entry['url']
if url != '':
try:
url = shortapi.shorten(url)
except:
url = shortapi.shorten('http://reddit.com')
# get self or link data
is_self = entry['is_self']
# set the status limit (for each link in title, limit is shortened by 2 because t.co links are 2 char longer than bit.ly links
statusLimit = 138
if urlCount > 0:
statusLimit = statusLimit - (2*urlCount)
# if self post, else
if is_self:
status = ' [%s] #%s' % (permalink['url'], subnick)
statusExtrasLen = len(status)
titleLimit = statusLimit - statusExtrasLen
if (shortMatchPos + 22) > titleLimit:
title = title.replace(shortMatchResult['url'], '')
status = title[:(statusLimit - len(status))] + status
else:
status = ' %s [%s] #%s' % (url['url'], permalink['url'], subnick)
statusExtrasLen = len(status)
titleLimit = statusLimit - statusExtrasLen
if (shortMatchPos + 22) > titleLimit:
title = title.replace(shortMatchResult['url'], '')
status = title[:((statusLimit - 2) - len(status))] + status
# tweet out status if one hasn't been tweeted yet, else store the status to be tweeted later
item = TwitterDB()
item.reddit_id = postid
item.subreddit = subname
if found_tweet == 0:
bot.update_status(status)
item.is_tweeted = True
item.tweet_status = status
item.put()
found_tweet = 1
tweets += '<p>' + status + '</p>'
else:
item.is_tweeted = False
item.tweet_status = status
item.put()
queued_tweets += '<p>' + status + '</p>'
lcv = lcv + 1
# if no new posts were tweeted, check the database for any untweeted posts and tweet one
if found_tweet == 0:
query_old_tweets = TwitterDB.all()
query_old_tweets.filter("is_tweeted =", False)
old_tweet = query_old_tweets.get()
if old_tweet:
bot.update_status(old_tweet.tweet_status)
old_tweet.is_tweeted = True
old_tweet.put()
tweets += '<p>' + old_tweet.tweet_status + '</p>'
# print tweets that were tweeted and tweets that were queued
if tweets:
self.response.out.write("<h3>Tweeted:</h3>" + tweets)
if queued_tweets:
self.response.out.write("<h3>Queued Tweets:</h3>" + queued_tweets)
self.response.out.write("<strong><em>Done!</em></strong>")
app = webapp2.WSGIApplication([
('/bots/twitterbot', TwitterBot)
], debug=True)
Jump to Line
Something went wrong with that request. Please try again.