In [51]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [52]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [53]:
df = pd.read_csv("/content/drive/MyDrive/clean_data.csv")

In [54]:
df.head(10)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,budget,genres,homepage,id,keywords,language,original_title,overview,...,tagline,title,vote_average,num_voted_users,title_year,country,director_name,actor_1_name,actor_2_name,actor_3_name
0,0,0,237000000,Action|Adventure|Fantasy|Science Fiction,http://www.avatarmovie.com/,19995,culture clash|future|space colony|society|spac...,English,Avatar,"In the 22nd century, a paraplegic Marine is di...",...,Enter the World of Pandora.,Avatar,7.2,11800,2009.0,United States of America,James Cameron,Zoe Saldana,Sigourney Weaver,Stephen Lang
1,1,1,300000000,Adventure|Fantasy|Action,http://disney.go.com/disneypictures/pirates/,285,ocean|drug abuse|exotic island|east india trad...,English,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",...,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,2007.0,United States of America,Gore Verbinski,Orlando Bloom,Keira Knightley,Stellan Skarsgård
2,2,2,245000000,Action|Adventure|Crime,http://www.sonypictures.com/movies/spectre/,206647,spy|based on novel|secret agent|sequel|british...,Français,Spectre,A cryptic message from Bond’s past sends him o...,...,A Plan No One Escapes,Spectre,6.3,4466,2015.0,United Kingdom,Sam Mendes,Christoph Waltz,Léa Seydoux,Ralph Fiennes
3,3,3,250000000,Action|Crime|Drama|Thriller,http://www.thedarkknightrises.com/,49026,dc comics|crime fighter|terrorist|secret ident...,English,The Dark Knight Rises,Following the death of District Attorney Harve...,...,The Legend Ends,The Dark Knight Rises,7.6,9106,2012.0,United States of America,Christopher Nolan,Michael Caine,Gary Oldman,Anne Hathaway
4,4,4,260000000,Action|Adventure|Science Fiction,http://movies.disney.com/john-carter,49529,based on novel|mars|medallion|space travel|pri...,English,John Carter,"John Carter is a war-weary, former military ca...",...,"Lost in our world, found in another.",John Carter,6.1,2124,2012.0,United States of America,Andrew Stanton,Lynn Collins,Samantha Morton,Willem Dafoe
5,5,5,258000000,Fantasy|Action|Adventure,http://www.sonypictures.com/movies/spider-man3/,559,dual identity|amnesia|sandstorm|love of one's ...,English,Spider-Man 3,The seemingly invincible Spider-Man goes up ag...,...,The battle within.,Spider-Man 3,5.9,3576,2007.0,United States of America,Sam Raimi,Kirsten Dunst,James Franco,Thomas Haden Church
6,6,6,260000000,Animation|Family,http://disney.go.com/disneypictures/tangled/,38757,hostage|magic|horse|fairy tale|music|princess|...,English,Tangled,When the kingdom's most wanted-and most charmi...,...,They're taking adventure to new lengths.,Tangled,7.4,3330,2010.0,United States of America,Byron Howard,Mandy Moore,Donna Murphy,Ron Perlman
7,7,7,280000000,Action|Adventure|Science Fiction,http://marvel.com/movies/movie/193/avengers_ag...,99861,marvel comic|sequel|superhero|based on comic b...,English,Avengers: Age of Ultron,When Tony Stark tries to jumpstart a dormant p...,...,A New Age Has Come.,Avengers: Age of Ultron,7.3,6767,2015.0,United States of America,Joss Whedon,Chris Hemsworth,Mark Ruffalo,Chris Evans
8,8,8,250000000,Adventure|Fantasy|Family,http://harrypotter.warnerbros.com/harrypottera...,767,witch|magic|broom|school of witchcraft|wizardr...,English,Harry Potter and the Half-Blood Prince,"As Harry begins his sixth year at Hogwarts, he...",...,Dark Secrets Revealed,Harry Potter and the Half-Blood Prince,7.4,5293,2009.0,United Kingdom,David Yates,Rupert Grint,Emma Watson,Tom Felton
9,9,9,250000000,Action|Adventure|Fantasy,http://www.batmanvsupermandawnofjustice.com/,209112,dc comics|vigilante|superhero|based on comic b...,English,Batman v Superman: Dawn of Justice,Fearing the actions of a god-like Super Hero l...,...,Justice or revenge,Batman v Superman: Dawn of Justice,5.7,7004,2016.0,United States of America,Zack Snyder,Henry Cavill,Gal Gadot,Amy Adams


In [55]:
df['overview'].head(10)

0    In the 22nd century, a paraplegic Marine is di...
1    Captain Barbossa, long believed to be dead, ha...
2    A cryptic message from Bond’s past sends him o...
3    Following the death of District Attorney Harve...
4    John Carter is a war-weary, former military ca...
5    The seemingly invincible Spider-Man goes up ag...
6    When the kingdom's most wanted-and most charmi...
7    When Tony Stark tries to jumpstart a dormant p...
8    As Harry begins his sixth year at Hogwarts, he...
9    Fearing the actions of a god-like Super Hero l...
Name: overview, dtype: object

In [56]:
tfidf = TfidfVectorizer(stop_words = 'english')

In [57]:
df['overview'] = df['overview'].fillna('')

In [58]:
matrix_tfidf = tfidf.fit_transform(df['overview'])
matrix_tfidf.shape

(4803, 20978)

In [59]:
cosine = linear_kernel(matrix_tfidf, matrix_tfidf)
cosine.shape

(4803, 4803)

In [60]:
index = pd.Series(df.index, index = df['title']).drop_duplicates()
index[:10]

title
Avatar                                      0
Pirates of the Caribbean: At World's End    1
Spectre                                     2
The Dark Knight Rises                       3
John Carter                                 4
Spider-Man 3                                5
Tangled                                     6
Avengers: Age of Ultron                     7
Harry Potter and the Half-Blood Prince      8
Batman v Superman: Dawn of Justice          9
dtype: int64

In [61]:
def recommendations(movie_title, cosine = cosine):
  indices = index[movie_title] # Gets index of movie that matches the title

  scores = list(enumerate(cosine[indices])) # Get the similarity (pairwise) of all movies with that movie
  scores = sorted(scores, key = lambda y: y[1], reverse = True) # Sort the movies based on the similarity scores

  scores = scores[1:11] # Scores of 10 most similar movies

  indices_movie = [a[0] for a in scores]

  return df['title'].iloc[indices_movie]

### **Plot Description Based Recommender**

In [62]:
recommendations('The Dark Knight Rises')

65                              The Dark Knight
299                              Batman Forever
428                              Batman Returns
1359                                     Batman
3854    Batman: The Dark Knight Returns, Part 2
119                               Batman Begins
2507                                  Slow Burn
9            Batman v Superman: Dawn of Justice
1181                                        JFK
210                              Batman & Robin
Name: title, dtype: object

In [63]:
recommendations('Tangled')

2309                           Out of Inferno
39                               TRON: Legacy
330     The Lord of the Rings: The Two Towers
4714                 An American in Hollywood
1470                                   Stolen
1484                        Snakes on a Plane
256                                 Allegiant
1984                The Thief and the Cobbler
986                             Your Highness
2155                          The World's End
Name: title, dtype: object

In [64]:
recommendations('Harry Potter and the Half-Blood Prince')

114           Harry Potter and the Goblet of Fire
113     Harry Potter and the Order of the Phoenix
191      Harry Potter and the Prisoner of Azkaban
276       Harry Potter and the Chamber of Secrets
501                             The Little Prince
4767                               Run, Hide, Die
197      Harry Potter and the Philosopher's Stone
4133                                        Plush
3015                                   Sinister 2
876                          Domestic Disturbance
Name: title, dtype: object

In [65]:
def clean_data(y):
  if isinstance(y, list):
    return [str.lower(i.replace(" ", "")) for i in y]
  else:
        if isinstance(y, str):
            return str.lower(y.replace(" ", ""))
        else:
            return ''

In [66]:
features = ['keywords', 'director_name', 'genres', 'actor_1_name']

for i in features:
    df[i] = df[i].apply(clean_data)

In [67]:
def join(x):
  return ' '.join(x['keywords']) + ' ' + ' '.join(x['genres']) + ' ' + x['director_name'] + ' ' + ' '.join(x['actor_1_name'])

In [68]:
df['join'] = df.apply(join, axis=1)

In [69]:
df[['join']].head(5)

Unnamed: 0,join
0,c u l t u r e c l a s h | f u t u r e | s p a ...
1,o c e a n | d r u g a b u s e | e x o t i c i ...
2,s p y | b a s e d o n n o v e l | s e c r e t ...
3,d c c o m i c s | c r i m e f i g h t e r | t ...
4,b a s e d o n n o v e l | m a r s | m e d a l ...


In [70]:
# Import CountVectorizer and create the count matrix
count = CountVectorizer(stop_words='english')
matrix_count = count.fit_transform(df['join'])

In [71]:
matrix_count.shape

(4803, 2469)

In [72]:
cosine_sim = cosine_similarity(matrix_count, matrix_count)

In [73]:
df = df.reset_index()
indices = pd.Series(df.index, index=df['title'])

### **Credits, Genres, and Keywords Based Recommender**

In [81]:
recommendations('The Dark Knight Rises', cosine_sim)

65                               The Dark Knight
95                                  Interstellar
96                                     Inception
119                                Batman Begins
1033                                    Insomnia
1196                                The Prestige
3573                                     Memento
0                                         Avatar
1       Pirates of the Caribbean: At World's End
2                                        Spectre
Name: title, dtype: object

In [82]:
recommendations('Tangled', cosine_sim)

0                                       Avatar
1     Pirates of the Caribbean: At World's End
2                                      Spectre
3                        The Dark Knight Rises
4                                  John Carter
5                                 Spider-Man 3
7                      Avengers: Age of Ultron
8       Harry Potter and the Half-Blood Prince
9           Batman v Superman: Dawn of Justice
10                            Superman Returns
Name: title, dtype: object

In [83]:
recommendations('Spider-Man 3', cosine_sim)

30                    Spider-Man 2
37      Oz: The Great and Powerful
159                     Spider-Man
964           For Love of the Game
1514        The Quick and the Dead
1598               Drag Me to Hell
2424                 A Simple Plan
2944              Army of Darkness
3841                  Evil Dead II
4595                 The Evil Dead
Name: title, dtype: object