In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def get_title_from_index(index):
	return df[df.index == index]["title"].values[0]

def get_index_from_title(title):
	return df[df.title == title]["index"].values[0]

In [3]:
df = pd.read_csv("movie_dataset.csv")
df.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [4]:
features = ['keywords','cast','genres','director']

In [5]:
for feature in features:
    df[feature] = df[feature].fillna('')

In [6]:
def combine_features(row):
    return row['keywords']+" "+row["cast"]+" "+row["genres"]+" "+row['director']

In [7]:
df["combined_features"] = df.apply(combine_features,axis=1)

In [8]:
df.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director,combined_features
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron,culture clash future space war space colony so...
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski,ocean drug abuse exotic island east india trad...
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes,spy based on novel secret agent sequel mi6 Dan...
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan,dc comics crime fighter terrorist secret ident...
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton,based on novel mars medallion space travel pri...


In [9]:
cv = CountVectorizer()

In [10]:
count_matrix = cv.fit_transform(df["combined_features"])

In [11]:
cosine_sim = cosine_similarity(count_matrix)
cosine_sim

array([[1.        , 0.10540926, 0.12038585, ..., 0.        , 0.        ,
        0.        ],
       [0.10540926, 1.        , 0.0761387 , ..., 0.03651484, 0.        ,
        0.        ],
       [0.12038585, 0.0761387 , 1.        , ..., 0.        , 0.11145564,
        0.        ],
       ...,
       [0.        , 0.03651484, 0.        , ..., 1.        , 0.        ,
        0.04264014],
       [0.        , 0.        , 0.11145564, ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.04264014, 0.        ,
        1.        ]])

In [12]:
movie_user_likes = input("Which Movie do you like?")
movie_index = get_index_from_title(movie_user_likes)
similar_movies = list(enumerate(cosine_sim[movie_index]))
sorted_similar_movies = sorted(similar_movies,key = lambda x:x[1],reverse=True)
print("Here's a list of recommendations for you")
for i in range(1,11):
    print(get_title_from_index(sorted_similar_movies[i][0]))

Which Movie do you like?John Carter
The Host
Heaven is for Real
Beastmaster 2: Through the Portal of Time
The Helix... Loaded
Transformers
The Martian
The Hunger Games: Catching Fire
Ender's Game
Divergent
Damnation Alley


In [13]:
pip install imdbpy

Collecting imdbpy
  Downloading IMDbPY-6.8-py3-none-any.whl (295 kB)
Installing collected packages: imdbpy
Successfully installed imdbpy-6.8
Note: you may need to restart the kernel to use updated packages.


In [15]:
import imdb

In [16]:
moviesDB = imdb.IMDb()

In [17]:
dir(moviesDB)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_defModFunct',
 '_getRefs',
 '_get_infoset',
 '_get_keyword',
 '_get_real_characterID',
 '_get_real_companyID',
 '_get_real_movieID',
 '_get_real_personID',
 '_get_search_content',
 '_get_search_movie_advanced_content',
 '_get_top_bottom_movies',
 '_http_logger',
 '_keywordsResults',
 '_mdparse',
 '_normalize_characterID',
 '_normalize_companyID',
 '_normalize_movieID',
 '_normalize_personID',
 '_purge_seasons_data',
 '_reraise_exceptions',
 '_results',
 '_retrieve',
 '_searchIMDb',
 '_search_character',
 '_search_company',
 '_search_episode',
 '_search_keyword',
 '_search_movie',
 '_search_movie_advanced',
 '_search_per

In [25]:
movies = moviesDB.search_movie('Alien: Resurrection')
id = movies[0].getID()
movie = moviesDB.get_movie(id)
title=movie['title']
year=movie['year']
rating=movie['rating']
directors=movie['directors']
casting=movie['cast']

In [40]:
for key,value in movie.items():
    print(key,value)

cast [<Person id:0000244[http] name:_Sigourney Weaver_>, <Person id:0000213[http] name:_Winona Ryder_>, <Person id:0684500[http] name:_Dominique Pinon_>, <Person id:0000579[http] name:_Ron Perlman_>, <Person id:0235427[http] name:_Gary Dourdan_>, <Person id:0000699[http] name:_Michael Wincott_>, <Person id:0283200[http] name:_Kim Flowers_>, <Person id:0000445[http] name:_Dan Hedaya_>, <Person id:0293422[http] name:_J.E. Freeman_>, <Person id:0000374[http] name:_Brad Dourif_>, <Person id:0190441[http] name:_Raymond Cruz_>, <Person id:0650702[http] name:_Leland Orser_>, <Person id:0132274[http] name:_Carolyn Campbell_>, <Person id:0124172[http] name:_Marlene Bush_>, <Person id:0820574[http] name:_David St. James_>, <Person id:0593687[http] name:_Rodney Mitchell_>, <Person id:0266539[http] name:_Robert Faltisco_>, <Person id:0746499[http] name:_David Rowe_>, <Person id:0396656[http] name:_Garrett House_>, <Person id:0198714[http] name:_Rod Damer_>, <Person id:0543805[http] name:_Mark Mans

In [43]:
movie['box office']['Budget']

'$75,000,000 (estimated)'

In [27]:
title

'Alien: Resurrection'

In [28]:
rating

6.2

In [34]:
directors[0]['name']

'Jean-Pierre Jeunet'

In [35]:
for i in range(0,5):
    print(casting[i]['name'])

Sigourney Weaver
Winona Ryder
Dominique Pinon
Ron Perlman
Gary Dourdan
