In [None]:
# Import libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
movies = pd.read_csv("dataset/tmdb_5000_movies.csv")

# Select useful columns
movies = movies[['title', 'overview', 'genres', 'keywords']]

# Fill missing values
movies.fillna('', inplace=True)

# Combine features
movies['combined_features'] = movies['overview'] + ' ' + movies['genres'] + ' ' + movies['keywords']

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

# Compute cosine similarity
similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Recommendation function
def recommend(movie_name):
    if movie_name not in movies['title'].values:
        print("Movie not found!")
        return
    idx = movies[movies['title'] == movie_name].index[0]
    distances = sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x: x[1])
    print(f"\nMovies similar to '{movie_name}':\n")
    for i in distances[1:6]:
        print(movies.iloc[i[0]].title)

# Example
recommend('Avatar')
