-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f984c0f
Showing
2 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
Rotten Tomatoes | ||
=========== | ||
|
||
A Python implementation of getting Rotten Tomatoes rating for a movie using | ||
BeautifulSoup parser library | ||
|
||
Usage | ||
------------- | ||
|
||
To get the ratings of a movie in the terminal:: | ||
|
||
$ python rottenttomatoes.py Inception | ||
|
||
This command is to be written in the folder containing the code. | ||
The setup.py are under testing and are incomplete, they | ||
are for making the command portable and help in testing purposes and an better UI | ||
|
||
|
||
Credits | ||
------- | ||
|
||
BeautifulSoup parser library | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import sys | ||
import re | ||
import urllib | ||
import urlparse | ||
|
||
from BeautifulSoup import BeautifulSoup | ||
|
||
|
||
class MyOpener(urllib.FancyURLopener): | ||
"""Tricking web servers.""" | ||
version = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15' | ||
|
||
class RottenTomatoesRating: | ||
"""Get the rating of a movie.""" | ||
# title of the movie | ||
title = None | ||
# RT URL of the movie | ||
url = None | ||
# RT tomatometer rating of the movie | ||
tomatometer = None | ||
# RT audience rating of the movie | ||
audience = None | ||
# Did we find a result? | ||
found = False | ||
|
||
# for fetching webpages | ||
myopener = MyOpener() | ||
# Should we search and take the first hit? | ||
search = True | ||
|
||
# constant | ||
BASE_URL = 'http://www.rottentomatoes.com' | ||
SEARCH_URL = '%s/search/full_search.php?search=' % BASE_URL | ||
|
||
def __init__(self, title, search=True): | ||
self.title = title | ||
self.search = search | ||
self._process() | ||
|
||
def _search_movie(self): | ||
"""Use RT's own search and return the first hit.""" | ||
movie_url = "" | ||
|
||
url = self.SEARCH_URL + self.title | ||
page = self.myopener.open(url) | ||
result = re.search(r'(/m/.*)', page.geturl()) | ||
if result: | ||
# if we are redirected | ||
movie_url = result.group(1) | ||
else: | ||
# if we get a search list | ||
soup = BeautifulSoup(page.read()) | ||
ul = soup.find('ul', {'id' : 'movie_results_ul'}) | ||
if ul: | ||
div = ul.find('div', {'class' : 'media_block_content'}) | ||
if div: | ||
movie_url = div.find('a', href=True)['href'] | ||
|
||
return urlparse.urljoin( self.BASE_URL, movie_url ) | ||
|
||
def _process(self): | ||
"""Start the work.""" | ||
|
||
# if search option is off, i.e. try to locate the movie directly | ||
if not self.search: | ||
movie = '_'.join(self.title.split()) | ||
|
||
url = "%s/m/%s" % (self.BASE_URL, movie) | ||
soup = BeautifulSoup(self.myopener.open(url).read()) | ||
if soup.find('title').contents[0] == "Page Not Found": | ||
url = self._search_movie() | ||
else: | ||
# if search option is on => use RT's own search | ||
url = self._search_movie() | ||
|
||
try: | ||
self.url = url | ||
soup = BeautifulSoup( self.myopener.open(url).read() ) | ||
self.title = soup.find('meta', {'property' : 'og:title'})['content'] | ||
if self.title: | ||
self.found = True | ||
|
||
self.tomatometer = soup.find('span', {'id' : 'all-critics-meter'}).contents[0] | ||
self.audience = soup.find('span', {'class' : 'meter popcorn numeric '}).contents[0] | ||
|
||
if self.tomatometer.isdigit(): | ||
self.tomatometer += "%" | ||
if self.audience.isdigit(): | ||
self.audience += "%" | ||
except: | ||
pass | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) == 1: | ||
print "Usage: %s 'Movie title'" % (sys.argv[0]) | ||
else: | ||
rt = RottenTomatoesRating(sys.argv[1]) | ||
if rt.found: | ||
print rt.url | ||
print rt.title | ||
print rt.tomatometer | ||
print rt.audience |