In [2]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the movies dataset
movies = pd.read_csv('movies.csv')

# Define selected features to consider for similarity
selected_features = ['title', 'genres', 'tagline', 'cast', 'director']

# Replace null values with empty strings for selected features
for feature in selected_features:
    movies[feature] = movies[feature].fillna('')

# Combine selected features into a single text column for vectorization
movies['combined'] = movies['title'] + ' ' + movies['genres'] + ' ' + movies['tagline'] + ' ' + movies['cast'] + ' ' + movies['director']

# Initialize a TfidfVectorizer to convert text to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(movies['combined'])

# Compute cosine similarity between feature vectors
similarity = cosine_similarity(feature_vectors)

# Prompt user to input their favorite movie genre
movie_keywords = input('Enter your favorite movie keywords: ')

# Find the closest match for the movie genre given by the user
close_match = difflib.get_close_matches(movie_keywords, movies['keywords'].astype(str).tolist())
if not close_match:
    print(f"No close match found for '{movie_keywords}'. Please try different keywords.")
    exit()

# Select the first close match (assuming it exists)
close_match = close_match[0]
print(f"Close match found: {close_match}")

# Find the index of the movie with the closest matched genre
index_of_movie = movies[movies['keywords'].astype(str) == close_match].index[0]

# Get list of similar movies based on similarity scores
similarity_scores = list(enumerate(similarity[index_of_movie]))

# Sort movies by similarity scores in descending order
sorted_similar = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

# Print the top similar movies
print('Movies suggested for you:')
for i, movie in enumerate(sorted_similar[:15], 1):
    index = movie[0]
    title = movies.loc[index, 'title']
    print(f"{i}. {title}")


Enter your favorite movie keywords: spy based on novel secret agent sequel mi6
Close match found: spy based on novel secret agent sequel mi6
Movies suggested for you:
1. Spectre
2. Skyfall
3. Irreversible
4. The Legend of Tarzan
5. Carnage
6. The Green Hornet
7. Coriolanus
8. The Sorcerer's Apprentice
9. Mission: Impossible - Ghost Protocol
10. Final Destination
11. Django Unchained
12. Action Jackson
13. Big Eyes
14. Water for Elephants
15. Harry Potter and the Order of the Phoenix
