Permalink
Browse files

first commit

  • Loading branch information...
0 parents commit f984c0f07b40dfedb3b6f11d739f80d0c4cfade0 @abhinavgupta committed Jan 26, 2012
Showing with 125 additions and 0 deletions.
  1. +23 −0 README
  2. +102 −0 rottentomatoes.py
23 README
@@ -0,0 +1,23 @@
+Rotten Tomatoes
+===========
+
+A Python implementation of getting Rotten Tomatoes rating for a movie using
+BeautifulSoup parser library
+
+Usage
+-------------
+
+To get the ratings of a movie in the terminal::
+
+ $ python rottenttomatoes.py Inception
+
+This command is to be written in the folder containing the code.
+The setup.py are under testing and are incomplete, they
+are for making the command portable and help in testing purposes and an better UI
+
+
+Credits
+-------
+
+BeautifulSoup parser library
+
@@ -0,0 +1,102 @@
+import sys
+import re
+import urllib
+import urlparse
+
+from BeautifulSoup import BeautifulSoup
+
+
+class MyOpener(urllib.FancyURLopener):
+ """Tricking web servers."""
+ version = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15'
+
+class RottenTomatoesRating:
+ """Get the rating of a movie."""
+ # title of the movie
+ title = None
+ # RT URL of the movie
+ url = None
+ # RT tomatometer rating of the movie
+ tomatometer = None
+ # RT audience rating of the movie
+ audience = None
+ # Did we find a result?
+ found = False
+
+ # for fetching webpages
+ myopener = MyOpener()
+ # Should we search and take the first hit?
+ search = True
+
+ # constant
+ BASE_URL = 'http://www.rottentomatoes.com'
+ SEARCH_URL = '%s/search/full_search.php?search=' % BASE_URL
+
+ def __init__(self, title, search=True):
+ self.title = title
+ self.search = search
+ self._process()
+
+ def _search_movie(self):
+ """Use RT's own search and return the first hit."""
+ movie_url = ""
+
+ url = self.SEARCH_URL + self.title
+ page = self.myopener.open(url)
+ result = re.search(r'(/m/.*)', page.geturl())
+ if result:
+ # if we are redirected
+ movie_url = result.group(1)
+ else:
+ # if we get a search list
+ soup = BeautifulSoup(page.read())
+ ul = soup.find('ul', {'id' : 'movie_results_ul'})
+ if ul:
+ div = ul.find('div', {'class' : 'media_block_content'})
+ if div:
+ movie_url = div.find('a', href=True)['href']
+
+ return urlparse.urljoin( self.BASE_URL, movie_url )
+
+ def _process(self):
+ """Start the work."""
+
+ # if search option is off, i.e. try to locate the movie directly
+ if not self.search:
+ movie = '_'.join(self.title.split())
+
+ url = "%s/m/%s" % (self.BASE_URL, movie)
+ soup = BeautifulSoup(self.myopener.open(url).read())
+ if soup.find('title').contents[0] == "Page Not Found":
+ url = self._search_movie()
+ else:
+ # if search option is on => use RT's own search
+ url = self._search_movie()
+
+ try:
+ self.url = url
+ soup = BeautifulSoup( self.myopener.open(url).read() )
+ self.title = soup.find('meta', {'property' : 'og:title'})['content']
+ if self.title:
+ self.found = True
+
+ self.tomatometer = soup.find('span', {'id' : 'all-critics-meter'}).contents[0]
+ self.audience = soup.find('span', {'class' : 'meter popcorn numeric '}).contents[0]
+
+ if self.tomatometer.isdigit():
+ self.tomatometer += "%"
+ if self.audience.isdigit():
+ self.audience += "%"
+ except:
+ pass
+
+if __name__ == "__main__":
+ if len(sys.argv) == 1:
+ print "Usage: %s 'Movie title'" % (sys.argv[0])
+ else:
+ rt = RottenTomatoesRating(sys.argv[1])
+ if rt.found:
+ print rt.url
+ print rt.title
+ print rt.tomatometer
+ print rt.audience

0 comments on commit f984c0f

Please sign in to comment.