In [1]:
!pip install pandas numpy scikit-learn matplotlib




In [2]:
import pandas as pd
import numpy as np
import ast

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [4]:
movies = pd.read_csv(
    "/content/tmdb_5000_movies.csv",
    engine="python",
    encoding="utf-8",
    on_bad_lines="skip"
)

credits = pd.read_csv(
    "/content/tmdb_5000_credits.csv",
    engine="python",
    encoding="utf-8",
    on_bad_lines="skip"
)


movies = movies.merge(credits, on="title")


In [5]:
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
movies.fillna('', inplace=True)


In [6]:
def extract_names(text):
    return " ".join([i['name'] for i in ast.literal_eval(text)])

def extract_cast(text):
    return " ".join([i['name'] for i in ast.literal_eval(text)[:3]])

def extract_director(text):
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            return i['name']
    return ""


In [7]:
movies['genres'] = movies['genres'].apply(extract_names)
movies['keywords'] = movies['keywords'].apply(extract_names)
movies['cast'] = movies['cast'].apply(extract_cast)
movies['crew'] = movies['crew'].apply(extract_director)


In [8]:
movies['tags'] = (
    movies['overview'] + " " +
    movies['genres'] + " " +
    movies['keywords'] + " " +
    movies['cast'] + " " +
    movies['crew']
)

movies = movies[['movie_id', 'title', 'tags']]


In [9]:
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
vectors = tfidf.fit_transform(movies['tags'])


In [10]:
similarity = cosine_similarity(vectors)


In [11]:
def recommend(movie_name, top_n=5):
    if movie_name not in movies['title'].values:
        return ["Movie not found. Please check spelling."]

    index = movies[movies['title'] == movie_name].index[0]
    distances = list(enumerate(similarity[index]))
    distances = sorted(distances, reverse=True, key=lambda x: x[1])[1:top_n+1]

    return [movies.iloc[i[0]].title for i in distances]


In [12]:
recommend("Avatar")


['AlienÂ³',
 'Mission to Mars',
 'Lost in Space',
 'Star Trek Into Darkness',
 'Treasure Planet']

In [13]:
movie = input("Enter movie name: ")
recommend(movie)


Enter movie name: Thor


['Thor: The Dark World',
 'Avengers: Age of Ultron',
 'The Avengers',
 'Captain America: Civil War',
 'Iron Man 3']