In [120]:
import pandas as pd
df = pd.read_csv('movies.csv')
df['overview'].head(10)
df.sample(frac=0.8, random_state=10)
df['overview'] = df['overview'].fillna('')
df.columns

Index(['Unnamed: 0', 'budget', 'genres', 'homepage', 'id', 'keywords',
       'original_language', 'original_title', 'overview', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'vote_average', 'vote_count', 'title_x', 'cast', 'crew'],
      dtype='object')

In [68]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(min_df=3, ngram_range=(1,3),stop_words='english')
tfidfp = tfidf.fit_transform(df['overview'])

In [66]:
from sklearn.feature_extraction.text import CountVectorizer
countv = CountVectorizer(min_df=3,ngram_range=(1,3),stop_words='english')
countvp = countv.fit_transform(df['overview'])

In [70]:
from sklearn.metrics.pairwise import cosine_similarity
cos = cosine_similarity(tfidfp, tfidfp)
cos1 = cosine_similarity(countvp,countvp)

In [114]:
from sklearn.metrics.pairwise import cosine_distances
cos2 = cosine_distances(tfidfp,tfidfp)
cos3=cosine_distances(countvp,countvp)

In [71]:
index = pd.Series(df.index, index = df['original_title']).drop_duplicates()
index

original_title
Avatar                                         0
Pirates of the Caribbean: At World's End       1
Spectre                                        2
The Dark Knight Rises                          3
John Carter                                    4
                                            ... 
El Mariachi                                 4798
Newlyweds                                   4799
Signed, Sealed, Delivered                   4800
Shanghai Calling                            4801
My Date with Drew                           4802
Length: 4803, dtype: int64

In [103]:
def get_recommendations(title, cos_similarity):
    idx = index[title]
    s=cos_similarity[0].tolist()
    d=sorted(list(enumerate(cos[idx])),key=lambda x:x[1],reverse=1)[:11]
    return df[['original_title','runtime']].iloc[[i[0] for i in d][1:]].set_index("original_title")

In [104]:
get_recommendations("Avatar",cos_similarity=cos)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
Obitaemyy Ostrov,115.0
The Matrix,136.0
Apollo 18,86.0
The American,104.0
Supernova,91.0
Tears of the Sun,121.0
Beowulf,115.0
The Adventures of Pluto Nash,95.0
Semi-Pro,91.0
Ransom,117.0


In [105]:
get_recommendations('Avatar',cos_similarity=cos1)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
Obitaemyy Ostrov,115.0
The Matrix,136.0
Apollo 18,86.0
The American,104.0
Supernova,91.0
Tears of the Sun,121.0
Beowulf,115.0
The Adventures of Pluto Nash,95.0
Semi-Pro,91.0
Ransom,117.0


In [106]:
get_recommendations('Newlyweds',cos)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
Ted 2,115.0
"You, Me and Dupree",108.0
Our Family Wedding,103.0
Something Wicked,95.0
Bride Wars,89.0
Just Married,95.0
The Secret Life of Pets,87.0
Bullet to the Head,92.0
कभी अलविदा ना कहना,193.0
Fantastic Four,100.0


In [107]:
get_recommendations('The Hobbit: The Desolation of Smaug',cos)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
The Hobbit: An Unexpected Journey,169.0
The Hobbit: The Battle of the Five Armies,144.0
The Lord of the Rings: The Fellowship of the Ring,178.0
The Sisterhood of the Traveling Pants 2,117.0
The Lord of the Rings: The Return of the King,201.0
Without a Paddle,95.0
How to Train Your Dragon 2,102.0
Old Joy,73.0
Dragon Nest: Warriors' Dawn,88.0
George and the Dragon,93.0


In [109]:
get_recommendations('The Amazing Spider-Man',cos1)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
The Amazing Spider-Man 2,142.0
Spider-Man 3,139.0
Spider-Man 2,127.0
Spider-Man,121.0
Forgetting Sarah Marshall,111.0
Hook,144.0
Due Date,95.0
Trucker,90.0
Pan,111.0
"I Love You, Man",105.0


In [118]:
get_recommendations("Hook",cos)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
Finding Neverland,106.0
Pan,111.0
Return to Never Land,72.0
Forgetting Sarah Marshall,111.0
The Amazing Spider-Man,136.0
The Amazing Spider-Man 2,142.0
Bringing Down the House,105.0
Due Date,95.0
Trucker,90.0
Lost Souls,97.0


In [119]:
get_recommendations("Due Date",cos3)

Unnamed: 0_level_0,runtime
original_title,Unnamed: 1_level_1
Forgetting Sarah Marshall,111.0
The Amazing Spider-Man 2,142.0
The Amazing Spider-Man,136.0
Hook,144.0
Les herbes folles,104.0
Trucker,90.0
The Sweetest Thing,84.0
Bringing Down the House,105.0
"I Love You, Man",105.0
It Happened One Night,105.0
