Permalink
Browse files

Script to fetch the number of installs for a given Tumblr theme

Requires BeautifulSoup. Tested with Python 2.6.
  • Loading branch information...
1 parent 74fb2d7 commit 39d44446998efa5eb370b90ea54aa79b49114dfb @inky committed Mar 14, 2010
Showing with 206 additions and 0 deletions.
  1. +107 −0 scripts/openanything.py
  2. +99 −0 scripts/theme_installs.py
View
@@ -0,0 +1,107 @@
+'''OpenAnything: a kind and thoughtful library for HTTP web services
+
+This program is part of 'Dive Into Python', a free Python book for
+experienced programmers. Visit http://diveintopython.org/ for the
+latest version.
+'''
+
+__author__ = 'Mark Pilgrim (mark@diveintopython.org)'
+__version__ = '$Revision: 1.6 $'[11:-2]
+__date__ = '$Date: 2004/04/16 21:16:24 $'
+__copyright__ = 'Copyright (c) 2004 Mark Pilgrim'
+__license__ = 'Python'
+
+import urllib2, urlparse, gzip
+from StringIO import StringIO
+
+USER_AGENT = 'OpenAnything/%s +http://diveintopython.org/http_web_services/' % __version__
+
+class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
+ def http_error_301(self, req, fp, code, msg, headers):
+ result = urllib2.HTTPRedirectHandler.http_error_301(
+ self, req, fp, code, msg, headers)
+ result.status = code
+ return result
+
+ def http_error_302(self, req, fp, code, msg, headers):
+ result = urllib2.HTTPRedirectHandler.http_error_302(
+ self, req, fp, code, msg, headers)
+ result.status = code
+ return result
+
+class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
+ def http_error_default(self, req, fp, code, msg, headers):
+ result = urllib2.HTTPError(
+ req.get_full_url(), code, msg, headers, fp)
+ result.status = code
+ return result
+
+def openAnything(source, etag=None, lastmodified=None, agent=USER_AGENT):
+ """URL, filename, or string --> stream
+
+ This function lets you define parsers that take any input source
+ (URL, pathname to local or network file, or actual data as a string)
+ and deal with it in a uniform manner. Returned object is guaranteed
+ to have all the basic stdio read methods (read, readline, readlines).
+ Just .close() the object when you're done with it.
+
+ If the etag argument is supplied, it will be used as the value of an
+ If-None-Match request header.
+
+ If the lastmodified argument is supplied, it must be a formatted
+ date/time string in GMT (as returned in the Last-Modified header of
+ a previous request). The formatted date/time will be used
+ as the value of an If-Modified-Since request header.
+
+ If the agent argument is supplied, it will be used as the value of a
+ User-Agent request header.
+ """
+
+ if hasattr(source, 'read'):
+ return source
+
+ if source == '-':
+ return sys.stdin
+
+ if urlparse.urlparse(source)[0] == 'http':
+ # open URL with urllib2
+ request = urllib2.Request(source)
+ request.add_header('User-Agent', agent)
+ if lastmodified:
+ request.add_header('If-Modified-Since', lastmodified)
+ if etag:
+ request.add_header('If-None-Match', etag)
+ request.add_header('Accept-encoding', 'gzip')
+ opener = urllib2.build_opener(SmartRedirectHandler(), DefaultErrorHandler())
+ return opener.open(request)
+
+ # try to open with native open function (if source is a filename)
+ try:
+ return open(source)
+ except (IOError, OSError):
+ pass
+
+ # treat source as string
+ return StringIO(str(source))
+
+def fetch(source, etag=None, lastmodified=None, agent=USER_AGENT):
+ '''Fetch data and metadata from a URL, file, stream, or string'''
+ result = {}
+ f = openAnything(source, etag, lastmodified, agent)
+ result['data'] = f.read()
+ if hasattr(f, 'headers'):
+ # save ETag, if the server sent one
+ result['etag'] = f.headers.get('ETag')
+ # save Last-Modified header, if the server sent one
+ result['lastmodified'] = f.headers.get('Last-Modified')
+ if f.headers.get('content-encoding') == 'gzip':
+ # data came back gzip-compressed, decompress it
+ result['data'] = gzip.GzipFile(fileobj=StringIO(result['data'])).read()
+ if hasattr(f, 'url'):
+ result['url'] = f.url
+ result['status'] = 200
+ if hasattr(f, 'status'):
+ result['status'] = f.status
+ f.close()
+ return result
+
View
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+"""
+Tumblr Theme Installs
+
+Fetch the number of installs for a given Tumblr theme.
+
+Copyright (c) 2010 Liam Cooke
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+import gzip
+import optparse
+import re
+import sys
+import urllib, urllib2
+from StringIO import StringIO
+
+from BeautifulSoup import BeautifulSoup
+
+from openanything import fetch
+
+class HumanNumber:
+ def __init__(self, string):
+ self._string = str(string)
+ self._int = int(string.replace(',', ''))
+ def __repr__(self):
+ return 'HumanNumber(%s)' % repr(self._string)
+ def __str__(self):
+ return self._string
+ def __int__(self):
+ return self._int
+
+def theme_url(id):
+ try:
+ return 'http://www.tumblr.com/theme/%d' % int(id)
+ except ValueError:
+ return id
+
+def theme_installs(id, full_response=False):
+ """
+ Return the number of installs for a given theme. id may be a numeric id
+ (e.g. 1386) or a complete url (e.g. 'http://www.tumblr.com/theme/1386').
+ """
+ response = fetch(theme_url(id))
+
+ soup = BeautifulSoup(response['data'])
+ try:
+ installs = soup.find('div', id='install_count').string
+ response['data'] = HumanNumber(installs)
+ except AttributeError:
+ response['data'] = None
+ return full_response and response or response['data']
+
+def main():
+ parser = optparse.OptionParser(usage='%prog [options] theme_id')
+ parser.add_option('-i', '--int', action='store_true', default=False,
+ help="machine-friendly output (e.g. 1234 instead of 1,234)")
+ parser.add_option('-v', '--verbose', action='store_true', default=False,
+ help='verbose output')
+
+ opts, args = parser.parse_args()
+
+ if not len(args):
+ parser.print_help()
+ return 1
+
+ result = theme_installs(args[0], opts.verbose)
+ if opts.verbose:
+ for key, val in result.items():
+ if key != 'data':
+ print('%s: %s' % (key, val))
+ result = result['data']
+ if opts.int:
+ print(int(result))
+ else:
+ print(result)
+
+if __name__ == '__main__':
+ sys.exit(main())

0 comments on commit 39d4444

Please sign in to comment.