-
Notifications
You must be signed in to change notification settings - Fork 0
/
imddbb.py
executable file
·83 lines (64 loc) · 1.95 KB
/
imddbb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
'''
Author : Jay Rambhia
email : jayrambhia777@gmail.com
Git : https://github.com/jayrambhia
gist : https://gist.github.com/jayrambhia
'''
import urllib2
from bs4 import BeautifulSoup
from mechanize import Browser
import re
def getunicode(soup):
body=''
if isinstance(soup, unicode):
soup = soup.replace(''',"'")
soup = soup.replace('"','"')
soup = soup.replace(' ',' ')
body = body + soup
else:
if not soup.contents:
return ''
con_list = soup.contents
for con in con_list:
body = body + getunicode(con)
return body
def main():
movie = str(raw_input('Movie Name: '))
movie_search = '+'.join(movie.split())
base_url = 'http://www.imdb.com/find?q='
url = base_url+movie_search+'&s=all'
title_search = re.compile('/title/tt\d+')
br = Browser()
br.set_proxies({'http':'http://username:password@proxy:port',
'https':'https://username:password@proxy:port'})
br.open(url)
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
movie_title = getunicode(soup.find('title'))
rate = soup.find('span',itemprop='ratingValue')
rating = getunicode(rate)
actors=[]
actors_soup = soup.findAll('a',itemprop='actors')
for i in range(len(actors_soup)):
actors.append(getunicode(actors_soup[i]))
des = soup.find('meta',{'name':'description'})['content']
genre=[]
infobar = soup.find('div',{'class':'infobar'})
r = infobar.find('',{'title':True})['title']
genrelist = infobar.findAll('a',{'href':True})
for i in range(len(genrelist)-1):
genre.append(getunicode(genrelist[i]))
release_date = getunicode(genrelist[-1])
print movie_title,rating+'/10.0'
print 'Relase Date:',release_date
print 'Rated',r
print ''
print 'Genre:',
print ', '.join(genre)
print '\nActors:',
print ', '.join(actors)
print '\nDescription:'
print des
if __name__ == '__main__':
main()