Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 239 lines (198 sloc) 9.37 KB
#!/usr/bin/env python
# bot_label_here
import webapp2
from google.appengine.api import xmpp
from google.appengine.ext import db
import logging
from md5 import md5
import tweepy
import bitly_api
import requests
import HTMLParser
from bs4 import UnicodeDammit
import re
import twitter_regex
from local_settings import TwitterKey, BitlyKey
# setup app datastore for posts/tweets
class TwitterDB(db.Model):
reddit_id = db.StringProperty()
created_at = db.DateTimeProperty(auto_now_add=True)
tweet_status = db.StringProperty(multiline=True)
is_tweeted = db.BooleanProperty()
subreddit = db.StringProperty()
# function: build headers with unique user agent
def build_header(twitter_handle, reddit_username):
hdr = { 'User-Agent' : '@' + twitter_handle + ' twitter bot by /u/' + reddit_username }
return hdr
# function: build request url based on subreddit dictionary
def build_request_url(sub_dict):
requestUrl = ''
subredditsLength = len(sub_dict)
requestLcv = 0
for sub in sub_dict:
requestUrl = requestUrl + sub['name']
if requestLcv != (subredditsLength - 1):
requestUrl = requestUrl + '+'
requestLcv = requestLcv + 1
requestUrl = 'http://www.reddit.com/r/' + requestUrl + '/hot.json?limit=100'
return requestUrl
# function: get json data from reddit based on requestUrl and header
def get_reddit_data(requestUrl, header, self):
req = requests.get(requestUrl, headers=header)
if req.status_code == 200:
self.response.out.write("Connected: " + str(req.status_code) + "<br />")
jsondata = req.json()
else:
req.raise_for_status()
self.response.out.write("Error:" + "<br />")
jsondata = False
return jsondata
# function: prepare post title by making sure it is unicode and has no html entities
def prepare_post_title(title):
title = UnicodeDammit(title)
title = title.unicode_markup
h = HTMLParser.HTMLParser()
return h.unescape(title)
# function: build twitter status with post data
def build_status(is_self, permalink, subnick, statusLimit, shortMatchPos, shortMatchResult, title, url):
# if self post, else
if is_self:
status = ' [%s] #%s' % (permalink, subnick)
statusExtrasLen = len(status)
titleLimit = statusLimit - statusExtrasLen
if (shortMatchPos + 22) > titleLimit:
title = title.replace(shortMatchResult, '')
return title[:(statusLimit - len(status))] + status
else:
status = ' %s [%s] #%s' % (url, permalink, subnick)
statusExtrasLen = len(status)
titleLimit = statusLimit - statusExtrasLen
if (shortMatchPos + 22) > titleLimit:
title = title.replace(shortMatchResult, '')
return title[:((statusLimit - 2) - len(status))] + status
# main class
class TwitterBot(webapp2.RequestHandler):
def get(self):
# get twitter app keys and authenticate
consumer_key = TwitterKey['consumer_key']
consumer_secret = TwitterKey['consumer_secret']
access_token = TwitterKey['access_token']
access_token_secret = TwitterKey['access_token_secret']
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
bot = tweepy.API(auth)
# authenticate bitly
shortapi = bitly_api.Connection(access_token=BitlyKey)
# IMPORTANT: SET UNIQUE USER AGENT
hdr = build_header('TWITTER_HANDLE_HERE', 'REDDIT_USERNAME_HERE')
# dictionary [subreddit name, subreddit nickname, score threshold]
subreddits = [
{"name": "subreddit1", "nickname": "sub1", "threshold": 100},
{"name": "subreddit2", "nickname": "sub2", "threshold": 200}
]
# create multi-reddit request url
requestUrl = build_request_url(subreddits)
# connect to reddit.com and retrieve json
jsondata = get_reddit_data(requestUrl, hdr, self)
tweets = ''
queued_tweets = ''
found_tweet = 0
# if json data retrieved
if 'data' in jsondata and 'children' in jsondata['data']:
posts = jsondata['data']['children']
posts.reverse()
# loop through json data
lcv = 1
for ind, post in enumerate(posts):
# get post id and score
entry = post['data']
postid = entry['id']
# check if post already exists in database
query = TwitterDB.all()
query.filter('reddit_id =', postid)
res = query.get()
# if post is not already in the database
if not res:
# get basic post data
entry = post['data']
subname = entry['subreddit']
score = entry['score']
subnick = ''
subredditscore = 0
for sub in subreddits:
if sub['name'].lower() == subname.lower():
subnick = sub['nickname']
subredditscore = sub['threshold']
break
# if post was not already stored in database and score criteria is met
if score >= subredditscore:
# get post title
title = prepare_post_title(entry['title'])
# find all links in post title that Twitter will wrap in a t.co
matches = re.findall(twitter_regex.REGEXEN['valid_url'], title)
matchresults = [match[2] for match in matches if match]
#
urlCount = 0
shortMatchResult = ''
shortMatchPos = 0
for matchresult in matchresults:
try:
shortMatchResult = shortapi.shorten(matchresult)
shortMatchResult = shortMatchResult['url']
title = title.replace(matchresult, shortMatchResult)
shortMatchPos = title.find(shortMatchResult)
urlCount = urlCount + 1
except:
title = title.replace(matchresult, '')
# shorten post permalink (link to reddit)
try:
permalink = shortapi.shorten('http://www.reddit.com' + entry['permalink'])
except:
permalink = shortapi.shorten('http://www.reddit.com')
# shorten link if link.post
url = entry['url']
if not url:
try:
url = shortapi.shorten(url)
except:
url = shortapi.shorten('http://reddit.com')
# set the status limit (for each link in title, limit is shortened by 2 because t.co links are 2 char longer than bit.ly links
statusLimit = 138
if urlCount > 0:
statusLimit = statusLimit - (2*urlCount)
# build tweet status
status = build_status(entry['is_self'], permalink['url'], subnick, statusLimit, shortMatchPos, shortMatchResult, title, url)
# tweet out status if one hasn't been tweeted yet, else store the status to be tweeted later
item = TwitterDB()
if found_tweet == 0:
bot.update_status(status)
item.is_tweeted = True
found_tweet = 1
tweets += '<p>' + status + '</p>'
else:
item.is_tweeted = False
queued_tweets += '<p>' + status + '</p>'
item.reddit_id = postid
item.subreddit = subname
item.tweet_status = status
item.put()
lcv = lcv + 1
# if no new posts were tweeted, check the database for any untweeted posts and tweet one
if found_tweet == 0:
query_old_tweets = TwitterDB.all()
query_old_tweets.filter("is_tweeted =", False)
old_tweet = query_old_tweets.get()
if old_tweet:
bot.update_status(old_tweet.tweet_status)
old_tweet.is_tweeted = True
old_tweet.put()
tweets += '<p>' + old_tweet.tweet_status + '</p>'
# print tweets that were tweeted and tweets that were queued
if tweets:
self.response.out.write("<h3>Tweeted:</h3>" + tweets)
if queued_tweets:
self.response.out.write("<h3>Queued Tweets:</h3>" + queued_tweets)
self.response.out.write("<strong><em>Done!</em></strong>")
app = webapp2.WSGIApplication([
('/bots/twitterbot', TwitterBot)
], debug=True)