In [4]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity
import difflib

#load dataset 
movies_dataset = pd.read_csv('movies.csv')

#data handling and preparation
movies_dataset.isnull().sum()
movies_dataset.drop('homepage',axis=1,inplace = True)

#select features
selected_feature = ['genres','keywords','tagline','cast','director']
for features in selected_feature:
    movies_dataset[features] = movies_dataset[features].fillna('')
    
#combined selected features
combined_features = movies_dataset['genres']+' '+movies_dataset['keywords']+' '+movies_dataset['tagline']+' '+movies_dataset['cast']+' '+movies_dataset['director']

#use TfidfVectorizer model to convert categorical data in numeric
vectorizer = TfidfVectorizer()
feature_vector = vectorizer.fit_transform(combined_features)

#use the cosine_similarity to check the similarity between features
similarity = cosine_similarity(feature_vector)

#take input 
movies_names = input("Enter the movie name: ")

#list the titles of the movies
list_of_titles = movies_dataset['title'].tolist() 

#using difflib find the closed match of input in title column of dataset
find_class_match = difflib.get_close_matches(movies_names,list_of_titles)

close_match = find_class_match[0]

index_of_the_movie = movies_dataset[movies_dataset.title==close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score,key = lambda x:x[1],reverse = True)
print('movies suggested for you: ')
i = 1
for movies in sorted_similar_movies:
    index = movies[0]
    title_from_index = movies_dataset[movies_dataset.index == index]['title'].values[0]
    if (i<30):
        print(i,'.',title_from_index)
        i+=1

Enter the movie name: avtar
movies suggested for you: 
1 . Avatar
2 . Alien
3 . Aliens
4 . Guardians of the Galaxy
5 . Star Trek Beyond
6 . Star Trek Into Darkness
7 . Galaxy Quest
8 . Alien³
9 . Cargo
10 . Trekkies
11 . Gravity
12 . Moonraker
13 . Jason X
14 . Pocahontas
15 . Space Cowboys
16 . The Helix... Loaded
17 . Lockout
18 . Event Horizon
19 . Space Dogs
20 . Machete Kills
21 . Gettysburg
22 . Clash of the Titans
23 . Star Wars: Clone Wars: Volume 1
24 . The Right Stuff
25 . Terminator Salvation
26 . The Astronaut's Wife
27 . Planet of the Apes
28 . Star Trek
29 . Wing Commander
