In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
data = pd.read_csv('DATA/ratings.csv', index_col=False)
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,1225734739
1,1,110,4.0,1225865086
2,1,158,4.0,1225733503
3,1,260,4.5,1225735204
4,1,356,5.0,1225735119


In [44]:
data1 = data.sample(n=200000, random_state=1)

In [45]:
user_item_matrix = data1.pivot_table(index='userId', columns='movieId', values='rating')

In [46]:
user_item_matrix_filled = user_item_matrix.fillna(0)

In [47]:
item_similarity = cosine_similarity(user_item_matrix_filled.T)

In [48]:
item_similarity_data = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

In [49]:
data1.sample(n=5, random_state=1)

Unnamed: 0,userId,movieId,rating,timestamp
16626110,163257,3688,0.5,1115347413
19654650,192219,2302,3.5,1596770446
29092079,284319,1097,2.0,1211545251
22930618,223517,3262,4.5,1628965106
9357322,91980,357,2.0,898177103


------------------

In [50]:
target_user_id = 163257

In [51]:
target_user_ratings = user_item_matrix.loc[target_user_id]

In [52]:
rated_movies = target_user_ratings[target_user_ratings > 0].index

In [53]:
print(rated_movies)

Index([  89,  788, 1440, 1729, 2411, 2723, 3688, 3877, 3889, 4381, 4478, 5128,
       5613, 6156, 8939],
      dtype='int64', name='movieId')


In [54]:
movie_scores = {}

In [None]:
for movie in rated_movies:
    similar_movies = item_similarity_data[movie]
    
    # For each similar movie, check if it has been rated by the target user
    for similar_movie, similarity_score in similar_movies.items():
        if target_user_ratings.get(similar_movie, 0) > 0:  # The user hasn't rated this movie
            #print('1')
            if similar_movie not in movie_scores:
                #print('1')
                movie_scores[similar_movie] = 0
            movie_scores[similar_movie] += similarity_score  # Add the similarity score

In [64]:
# Check if movie_scores contains any valid data
if not movie_scores:
    print("No recommendations available.")
else:
    # Sort recommendations by similarity score
    recommended_movies = sorted(movie_scores.items(), key=lambda x: x[1], reverse=True)

    # Print the recommended movies
    print("Top 3 Recommended Movies for User", target_user_id)
    for movie, score in recommended_movies[:3]:
        print(f"MovieId: {movie}, Similarity Score: {score}")

Top 3 Recommended Movies for User 163257
MovieId: 4381, Similarity Score: 5.247921073441004
MovieId: 4478, Similarity Score: 4.53153403929925
MovieId: 8939, Similarity Score: 4.304519742682868
