In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
movies = pd.read_csv('../data/movies.dat', sep='::', header=None, names=['movie_id', 'title', 'genres'], encoding='ISO-8859-1')
ratings = pd.read_csv('../data/ratings.dat', sep='::', header=None, names=['user_id', 'movie_id', 'rating', 'timestamp'], encoding='ISO-8859-1')
users = pd.read_csv('../data/users.dat', sep='::', header=None, names=['user_id', 'gender', 'age', 'occupation', 'zip_code'], encoding='ISO-8859-1')

In [3]:
# Define the TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english')

In [4]:
# Fit and transform the genres column of the movies dataframe
movies_genres_tfidf = tfidf.fit_transform(movies['genres'])

In [5]:
# Compute the user preference vectors
user_preferences = ratings[(ratings['user_id'] == 1) & (ratings['rating'] >= 4)]
user_preferences = user_preferences.merge(movies, on='movie_id')
user_preferences_genres_tfidf = tfidf.transform(user_preferences['genres'])
user_preference_vector = user_preferences_genres_tfidf.sum(axis=0)

In [6]:
# Convert the user preference vector to a numpy array
user_preference_vector = np.array(user_preference_vector).reshape(-1)

In [7]:
# Compute the cosine similarity between the user preference vector and the movie feature vectors
similarity_scores = cosine_similarity(user_preference_vector.reshape(1, -1), movies_genres_tfidf)

In [8]:
# Get the row indices of the top n movies with the highest similarity scores
n = 10
top_movie_indices = similarity_scores.argsort()[0][-n:][::-1]

In [9]:
# Get the corresponding movie ids and titles
top_movie_ids = [movies.iloc[i]['movie_id'] for i in top_movie_indices]
top_movie_titles = [movies.iloc[i]['title'] for i in top_movie_indices]

print (top_movie_ids)
print (top_movie_titles)

[1812, 34, 1014, 2078, 2102, 588, 2081, 2080, 241, 1026]
['Wide Awake (1998)', 'Babe (1995)', 'Pollyanna (1960)', 'Jungle Book, The (1967)', 'Steamboat Willie (1940)', 'Aladdin (1992)', 'Little Mermaid, The (1989)', 'Lady and the Tramp (1955)', 'Fluke (1995)', 'So Dear to My Heart (1949)']
