In [126]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


# Load the data
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

# Filter the data
filtered_ratings = ratings[ratings["userId"].isin(range(1, 201))]
filtered_movies = movies[movies["movieId"].isin(filtered_ratings["movieId"].unique())].head(200)
filtered_ratings = filtered_ratings[filtered_ratings["movieId"].isin(filtered_movies["movieId"])]

# Create a user-movie matrix
user_movie_matrix = filtered_ratings.pivot_table(index="userId", columns="movieId", values="rating").fillna(0)

# Standardize the user-movie matrix
scaler = StandardScaler()
user_movie_matrix_std = scaler.fit_transform(user_movie_matrix)


# Calculate the similarity matrix
similarity_matrix = cosine_similarity(user_movie_matrix.T)

# Find the top 10 similar movies for the given movie IDs
def get_similar_movies(movie_id, n=11):
    movie_index = filtered_movies[filtered_movies["movieId"] == movie_id].index[0]
    movie_similarities = similarity_matrix[movie_index]
    top_n_movie_indices = movie_similarities.argsort()[-n:][::-1]
    return filtered_movies.iloc[top_n_movie_indices]

In [129]:
print(get_similar_movies(1))

     movieId                                      title  \
0          1                           Toy Story (1995)   
224      260  Star Wars: Episode IV - A New Hope (1977)   
97       110                          Braveheart (1995)   
123      150                           Apollo 13 (1995)   
46        50                 Usual Suspects, The (1995)   
92       104                       Happy Gilmore (1996)   
138      165          Die Hard: With a Vengeance (1995)   
43        47                Seven (a.k.a. Se7en) (1995)   
32        34                                Babe (1995)   
176      208                          Waterworld (1995)   
126      153                      Batman Forever (1995)   

                                          genres  
0    Adventure|Animation|Children|Comedy|Fantasy  
224                      Action|Adventure|Sci-Fi  
97                              Action|Drama|War  
123                         Adventure|Drama|IMAX  
46                        Crime|Myst

In [130]:
print(get_similar_movies(4))

     movieId                                           title  \
3          4                        Waiting to Exhale (1995)   
100      113                         Before and After (1996)   
153      181  Mighty Morphin Power Rangers: The Movie (1995)   
151      179                                 Mad Love (1995)   
206      240                                 Hideaway (1995)   
209      243                                    Gordy (1995)   
187      219                                Cure, The (1995)   
182      214           Before the Rain (Pred dozhdot) (1994)   
180      212                              Bushwhacked (1995)   
214      250             Heavyweights (Heavy Weights) (1995)   
177      209                       White Man's Burden (1995)   

                             genres  
3              Comedy|Drama|Romance  
100                   Drama|Mystery  
153                 Action|Children  
151                   Drama|Romance  
206                        Thriller  
209

In [137]:
def recommend_movies(user_id, user_movie_matrix, similarity_matrix, filtered_movies, n=3):
    user_ratings = user_movie_matrix.loc[user_id]
    #drop rated movies as i think we are getting the movies which he has probably watched
    rated_movies = user_ratings[user_ratings != 0].index
    updated_user_movie_matrix = user_movie_matrix.drop(rated_movies, axis=1)
    updated_similarity_matrix = updated_user_movie_matrix.T.corr()
    movie_scores = []
    for movie in rated_movies:
        try:
            movie_index = user_movie_matrix.columns.get_loc(movie)
            # the weighted scores are calculated by multiplying the similarity scores with the user's ratings for each movie.
            weighted_scores = updated_similarity_matrix[movie_index] * user_ratings
            # The score for the movie is then calculated by taking the weighted average of the user's ratings for the similar movies
            score = sum(weighted_scores) / sum(abs(updated_similarity_matrix[movie_index]))
            movie_scores.append((movie, score))
        except KeyError:
            
            continue

    movie_scores.sort(key=lambda x: x[1], reverse=True)
    return [movie_score[0] for movie_score in movie_scores][:n]


In [138]:
recommended_movies = recommend_movies(user_id=1, user_movie_matrix=user_movie_matrix, similarity_matrix=similarity_matrix,filtered_movies=filtered_movies, n=3)

for movie_id in recommended_movies:
    movie_info = filtered_movies[filtered_movies["movieId"] == movie_id]
    movie_title = movie_info["title"].values[0]
    print(movie_title)

Heat (1995)
Seven (a.k.a. Se7en) (1995)
Usual Suspects, The (1995)
