In [40]:
import pandas as pd
import numpy as np

In [41]:
df = pd.read_csv('Users-Movies Matrix.csv', index_col='User_ID')
ratings = df.values

In [42]:
def matrix_factorization(R, P, Q, k=5, steps=100, alpha=0.02, beta=0.01, threshold=0.01):
    Q = Q.T
    prev_e = float('inf')
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - np.dot(P[i, :], Q[:, j])
                    for r in range(k):
                        P[i][r] += alpha * (2 * eij * Q[r][j] - beta * P[i][r])
                        Q[r][j] += alpha * (2 * eij * P[i][r] - beta * Q[r][j])

        eR = np.dot(P, Q)
        e = 0
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    e += pow(R[i][j] - np.dot(P[i, :], Q[:, j]), 2)
                    for r in range(k):
                        e += (beta / 2) * (pow(P[i][r], 2) + pow(Q[r][j], 2))

        # Check for early stopping
        if step > 0 and abs(prev_e - e) < threshold:
            print(f"Stopping early at step {step}")
            break
        prev_e = e

    return P, Q.T


In [43]:
'''
n_users = ratings.shape[0]
n_movies = ratings.shape[1]
k = 20  # Number of latent features

np.random.seed(0)
user_matrix = np.random.normal(scale=1./k, size=(n_users, k))
movie_matrix = np.random.normal(scale=1./k, size=(n_movies, k))
'''


'\nn_users = ratings.shape[0]\nn_movies = ratings.shape[1]\nk = 20  # Number of latent features\n\nnp.random.seed(0)\nuser_matrix = np.random.normal(scale=1./k, size=(n_users, k))\nmovie_matrix = np.random.normal(scale=1./k, size=(n_movies, k))\n'

In [44]:
df_subset = df.head(10) # taking small subset of users to test
ratings_subset = df_subset.values
n_users_subset = ratings_subset.shape[0]  # Number of users in the subset
n_movies = ratings_subset.shape[1]        # Number of movies
k = 5                                    # Number of latent features to extract

np.random.seed(0)
user_matrix_subset = np.random.normal(scale=1./k, size=(n_users_subset, k))
movie_matrix_subset = np.random.normal(scale=1./k, size=(n_movies, k))


In [45]:
P_subset, Q_subset = matrix_factorization(ratings_subset, user_matrix_subset, movie_matrix_subset, k)


In [46]:
predicted_ratings_subset = np.dot(P_subset, Q_subset.T)


In [47]:
movie_titles = df.columns.tolist()
user_id = 6
user_index = df.index.get_loc(user_id)
user_predicted_ratings = predicted_ratings_subset[user_index]

In [48]:
movies_with_predictions = list(zip(movie_titles, user_predicted_ratings))
sorted_movies_with_predictions = sorted(movies_with_predictions, key=lambda x: x[1], reverse=True)
sorted_movies_with_predictions

[('Beverly Hills Cop', 6.0613805000278),
 ('Training Day', 5.5781938695078175),
 ('U.S. Marshals', 5.042218141632069),
 ('Sixteen Candles', 5.001449160299817),
 ('Chocolat', 4.941720928671315),
 ('Joy Ride', 4.903885252620871),
 ('Three Musketeers', 4.888544294443255),
 ('Harold and Kumar Go to White Castle', 4.865901935976154),
 ('Dawn of the Dead', 4.856367991298006),
 ('The Missing', 4.841239169187098),
 ('Shanghai Noon', 4.809981803954141),
 ('Ray', 4.803864434618918),
 ("Charlotte's Web", 4.802507401458694),
 ('Hercules', 4.731019843020174),
 ('Reservoir Dogs', 4.729777786533682),
 ('The Dead Zone: Season 2', 4.715258152509358),
 ('Rookie of the Year', 4.687948829341757),
 ('The Bourne Supremacy', 4.640423116778324),
 ('The Longest Yard', 4.607753677507673),
 ('Speed', 4.585863518989189),
 ("The Wizard of Oz: Collector's Edition", 4.576208211501693),
 ('Braveheart', 4.574362211356598),
 ('Free Willy', 4.5682257293103685),
 ('Bend It Like Beckham', 4.562757420310416),
 ('Ever After

In [49]:
user_original_ratings = df.loc[user_id].values
user_original_ratings = df.loc[user_id].values

unrated_movies_with_predictions = [(movie, rating) for movie, rating in sorted_movies_with_predictions if user_original_ratings[movie_titles.index(movie)] == 0]


In [50]:
N = 10  # Number of top recommendations to extract
top_recommendations = unrated_movies_with_predictions[:N]

print(f"Top {N} movie recommendations for User ID {user_id}:")
for movie, predicted_rating in top_recommendations:
    print(f"Movie: {movie}, Predicted Rating: {predicted_rating:.2f}")


Top 10 movie recommendations for User ID 6:
Movie: Beverly Hills Cop, Predicted Rating: 6.06
Movie: Training Day, Predicted Rating: 5.58
Movie: U.S. Marshals, Predicted Rating: 5.04
Movie: Sixteen Candles, Predicted Rating: 5.00
Movie: Chocolat, Predicted Rating: 4.94
Movie: Joy Ride, Predicted Rating: 4.90
Movie: Harold and Kumar Go to White Castle, Predicted Rating: 4.87
Movie: Dawn of the Dead, Predicted Rating: 4.86
Movie: The Missing, Predicted Rating: 4.84
Movie: Shanghai Noon, Predicted Rating: 4.81
