import sys
import re
import urllib
import urlparse
from BeautifulSoup import BeautifulSoup
class MyOpener(urllib.FancyURLopener):
"""Tricking web servers."""
version = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv: Gecko/20110303 Firefox/3.6.15'
class RottenTomatoesRating:
"""Get the rating of a movie."""
# title of the movie
title = None
# RT URL of the movie
url = None
# RT tomatometer rating of the movie
tomatometer = None
# RT audience rating of the movie
audience = None
# Did we find a result?
found = False
# for fetching webpages
myopener = MyOpener()
# Should we search and take the first hit?
search = True
# constant
SEARCH_URL = '%s/search/full_search.php?search=' % BASE_URL
def __init__(self, title, search=True):
self.title = title = search
def _search_movie(self):
"""Use RT's own search and return the first hit."""
movie_url = ""
url = self.SEARCH_URL + self.title
page =
result ='(/m/.*)', page.geturl())
if result:
# if we are redirected
movie_url =
# if we get a search list
soup = BeautifulSoup(
ul = soup.find('ul', {'id' : 'movie_results_ul'})
if ul:
div = ul.find('div', {'class' : 'media_block_content'})
if div:
movie_url = div.find('a', href=True)['href']
return urlparse.urljoin( self.BASE_URL, movie_url )
def _process(self):
"""Start the work."""
# if search option is off, i.e. try to locate the movie directly
if not
movie = '_'.join(self.title.split())
url = "%s/m/%s" % (self.BASE_URL, movie)
soup = BeautifulSoup(
if soup.find('title').contents[0] == "Page Not Found":
url = self._search_movie()
# if search option is on => use RT's own search
url = self._search_movie()
self.url = url
soup = BeautifulSoup( )
self.title = soup.find('meta', {'property' : 'og:title'})['content']
if self.title:
self.found = True
self.tomatometer = soup.find('span', {'id' : 'all-critics-meter'}).contents[0]
self.audience = soup.find('span', {'class' : 'meter popcorn numeric '}).contents[0]
if self.tomatometer.isdigit():
self.tomatometer += "%"
if self.audience.isdigit():
self.audience += "%"
if __name__ == "__main__":
if len(sys.argv) == 1:
print "Usage: %s 'Movie title'" % (sys.argv[0])
rt = RottenTomatoesRating(sys.argv[1])
if rt.found:
print rt.url
print rt.title
print rt.tomatometer
print rt.audience
