In [5]:
import pandas as pd
import numpy as np

movie_genres = pd.read_csv('movie_reviews/movie_genres.csv')
user_reviews = pd.read_csv('movie_reviews/user_reviews.csv')

In [6]:
movie_genres

Unnamed: 0.1,Unnamed: 0,movie_title,genre_action,genre_adventure,genre_animation,genre_biography,genre_comedy,genre_crime,genre_documentary,genre_drama,...,genre_mystery,genre_news,genre_reality-tv,genre_romance,genre_sci-fi,genre_short,genre_sport,genre_thriller,genre_war,genre_western
0,0,The Net,1,0,0,0,0,1,0,1,...,1,0,0,0,0,0,0,1,0,0
1,1,Happily N'Ever After,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,Tomorrowland,1,1,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,0,0
3,3,American Hero,1,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,0,0
4,4,Das Boot,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1995,Big Fish,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1996,1996,Get Real,0,0,0,0,1,0,0,1,...,0,0,0,1,0,0,0,0,0,0
1997,1997,Trading Places,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1998,1998,DOA: Dead or Alive,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
user_reviews.head()

Unnamed: 0.1,Unnamed: 0,User,The Net,Happily N'Ever After,Tomorrowland,American Hero,Das Boot,Final Destination 3,Licence to Kill,The Hundred-Foot Journey,...,The Martian,Micmacs,Solomon and Sheba,In the Company of Men,Silent House,Big Fish,Get Real,Trading Places,DOA: Dead or Alive,Hey Arnold! The Movie
0,0,Vincent,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,Edgar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,Addilyn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,Marlee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,Javier,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
R = user_reviews.iloc[:, 2:].copy().to_numpy()
n_users, n_movies = R.shape

In [9]:
from numpy.linalg import solve
n_factors = 10
U = np.random.rand(n_users, n_factors)
M = np.random.rand(n_movies, n_factors)
G = movie_genres.iloc[:, 2:].copy().to_numpy()


In [10]:
import numpy as np

def create_train_test_masks(ratings, split_ratio=0.8, seed=42):
    """
    Creates training and test masks by splitting nonzero ratings in the matrix.

    Parameters:
    - ratings: np.array (User-Movie Rating Matrix)
    - split_ratio: float (default 0.8) - fraction of nonzero entries to retain in train mask
    - seed: int (random seed for reproducibility)

    Returns:
    - train_matrix: np.array with 80% of ratings, 0 elsewhere
    - test_matrix: np.array with 20% of ratings, 0 elsewhere
    """
    np.random.seed(seed)

    # Find indices of all nonzero ratings
    nonzero_indices = np.argwhere(ratings > 0)  # Get (row, col) pairs

    # Shuffle indices
    np.random.shuffle(nonzero_indices)

    # Split indices into training (80%) and test (20%)
    split_idx = int(len(nonzero_indices) * split_ratio)
    train_indices = nonzero_indices[:split_idx]
    test_indices = nonzero_indices[split_idx:]

    # Create copies of the original matrix
    train_matrix = np.zeros_like(ratings)  # Empty training matrix
    test_matrix = np.zeros_like(ratings)   # Empty test matrix

    # Fill training matrix
    for row, col in train_indices:
        train_matrix[row, col] = ratings[row, col]

    # Fill test matrix
    for row, col in test_indices:
        test_matrix[row, col] = ratings[row, col]

    return train_matrix, test_matrix



In [22]:
import numpy as np
from numpy.linalg import solve
latent_dim=50
reg_param=0.1
max_iters=500
R_train, R_val = create_train_test_masks(R, 1.0)
U = np.random.rand(n_users, latent_dim)
M = np.random.rand(n_movies, latent_dim)

best_val_score = 100
counter = 0
for iteration in range(max_iters):
    # Solve for U while keeping M fixed
    for u in range(n_users):
        relevant_movies = R_train[u, :].nonzero()[0]  # Movies rated by user u
        if len(relevant_movies) > 0:
            M_subset = M[relevant_movies]  # Extract only relevant movie embeddings
            R_u = R_train[u, relevant_movies]  # Get known ratings

            # Solve least squares problem: (M.T @ M + λI)U = M.T @ R
            U[u] = solve(M_subset.T @ M_subset + reg_param * np.eye(latent_dim), M_subset.T @ R_u)

    # Solve for M while keeping U fixed
    for m in range(n_movies):
        relevant_users = R_train[:, m].nonzero()[0]  # Users who rated movie m
        if len(relevant_users) > 0:
            U_subset = U[relevant_users]  # Extract relevant user embeddings
            R_m = R_train[relevant_users, m] # Get known ratings

            # Solve least squares problem: (U.T @ U + λI)M = U.T @ R
            M[m] = solve(U_subset.T @ U_subset + reg_param * np.eye(latent_dim), U_subset.T @ R_m)

    if iteration % 5 == 0:
        # Compute RMSE only on known ratings
        predicted_ratings = U @ M.T
        train_mask = R_train > 0  # Only compare known ratings
        val_mask = R_val > 0
        train_error = np.sqrt(np.sum((R - predicted_ratings) ** 2 * train_mask) / np.sum(train_mask))
        val_error = np.sqrt(np.sum((R - predicted_ratings) ** 2 * val_mask) / np.sum(val_mask))
        print(f"Iteration {iteration}, Train RMSE: {train_error:.4f}, Val RMSE: {val_error:.4f}")
        if val_error < best_val_score:
            best_val_score = val_error
            counter = 0
        else:
            counter += 1
    
    # if counter == 5:
    #     break
        

  val_error = np.sqrt(np.sum((R - predicted_ratings) ** 2 * val_mask) / np.sum(val_mask))


Iteration 0, Train RMSE: 0.0212, Val RMSE: nan
Iteration 5, Train RMSE: 0.0180, Val RMSE: nan
Iteration 10, Train RMSE: 0.0176, Val RMSE: nan
Iteration 15, Train RMSE: 0.0173, Val RMSE: nan
Iteration 20, Train RMSE: 0.0170, Val RMSE: nan
Iteration 25, Train RMSE: 0.0169, Val RMSE: nan
Iteration 30, Train RMSE: 0.0168, Val RMSE: nan
Iteration 35, Train RMSE: 0.0167, Val RMSE: nan
Iteration 40, Train RMSE: 0.0166, Val RMSE: nan
Iteration 45, Train RMSE: 0.0165, Val RMSE: nan
Iteration 50, Train RMSE: 0.0164, Val RMSE: nan
Iteration 55, Train RMSE: 0.0163, Val RMSE: nan
Iteration 60, Train RMSE: 0.0163, Val RMSE: nan
Iteration 65, Train RMSE: 0.0162, Val RMSE: nan
Iteration 70, Train RMSE: 0.0161, Val RMSE: nan
Iteration 75, Train RMSE: 0.0161, Val RMSE: nan
Iteration 80, Train RMSE: 0.0160, Val RMSE: nan
Iteration 85, Train RMSE: 0.0160, Val RMSE: nan
Iteration 90, Train RMSE: 0.0159, Val RMSE: nan
Iteration 95, Train RMSE: 0.0158, Val RMSE: nan
Iteration 100, Train RMSE: 0.0158, Val RMS

current best: 50 features, 1.3535, labda=0.1

In [23]:
U

array([[-0.35157354,  0.96872769,  0.68118536, ...,  0.07196389,
         0.49919973,  0.1812895 ],
       [-0.12040597,  1.03814058,  0.38384829, ...,  0.14017371,
         0.25682052,  0.56325141],
       [ 0.43935076,  0.0209128 ,  0.29138834, ...,  0.58863027,
         0.20231106,  0.05268557],
       ...,
       [ 0.08616588,  0.35064803,  1.17257199, ...,  0.31417921,
        -0.03987936,  0.38510294],
       [-0.37632487,  0.25255939,  0.37217677, ...,  0.19873746,
        -0.48426179, -0.03643288],
       [ 0.49042087,  0.46506527,  0.17066421, ..., -0.45465839,
         0.41474207,  0.01669478]])

In [24]:
M

array([[ 0.04907281,  0.26349084,  0.34536202, ...,  0.46370865,
         0.40017635,  0.33417578],
       [ 0.41048656,  0.09080391,  0.3598789 , ..., -0.19545789,
         0.22028167,  0.27152782],
       [ 0.29315543,  0.42392537,  0.0029501 , ...,  0.36266992,
         0.48863039, -0.10714937],
       ...,
       [ 0.12698681, -0.06537703,  0.05944954, ...,  0.04100839,
         0.26636899, -0.02416342],
       [ 0.151401  , -0.04054674,  0.54197175, ...,  0.33457767,
         0.36977374,  0.52509191],
       [ 0.11261698,  0.19042551,  0.07069347, ..., -0.14415474,
        -0.21046448,  0.11838948]])

In [70]:
predicted_ratings = U @ M.T

In [71]:
for element in predicted_ratings[0]:
    print(element)

4.831324718646622
2.904540716905392
3.4158674236131605
3.1560583881910724
4.167389132599537
2.662813101035745
2.260073072662582
3.9443244899464887
2.6270347163322585
3.440178903777096
3.4154119780139283
1.818096199680186
2.718822668068285
3.8698636497950676
3.871935049815557
2.8863778454469062
3.8609154661953675
2.7571970818366145
3.691798103092633
3.9776529816084767
3.5865676379687907
2.388189201492732
3.655209996676699
3.6967903561870523
3.662585455066222
3.3670506026168816
3.195799952462805
3.4431632561597194
2.875528377223158
2.1548239361949793
4.390184528932116
4.0337792145183755
3.395807703233415
2.9901165713636817
3.9608206753921062
3.5240472647912657
4.203804861914778
1.855063616204921
3.428387794977554
2.725601944908743
3.152528230524911
3.3228734766710413
3.1483060512614083
2.5148507139408443
3.827289989239146
3.100799767641659
4.124014720110138
4.120471779519902
2.8372973528158725
3.215882587763857
4.034026601582142
3.1846671784806633
2.003091284814646
3.178088120930364
2.73

In [133]:
import numpy as np

# Given list of user indices
user_indices = np.array([100, 103, 109])

# Number of elements per user
num_elements = 10

# Expand each index by adding 0 to (num_elements - 1)
expanded_indices = user_indices[:, None] * num_elements + np.arange(num_elements)

# Flatten into a 1D array
expanded_indices = expanded_indices.flatten()

print(expanded_indices)


[1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1030 1031 1032 1033
 1034 1035 1036 1037 1038 1039 1090 1091 1092 1093 1094 1095 1096 1097
 1098 1099]


In [134]:
movie_indices = np.arange(n_movies)[:, None] + np.arange(n_users) * n_movies  # Shape: (n_movies, n_users)

In [135]:
movie_indices

array([[      0,    2000,    4000, ..., 1194000, 1196000, 1198000],
       [      1,    2001,    4001, ..., 1194001, 1196001, 1198001],
       [      2,    2002,    4002, ..., 1194002, 1196002, 1198002],
       ...,
       [   1997,    3997,    5997, ..., 1195997, 1197997, 1199997],
       [   1998,    3998,    5998, ..., 1195998, 1197998, 1199998],
       [   1999,    3999,    5999, ..., 1195999, 1197999, 1199999]])

In [137]:
U.shape

(1200000, 50)

In [154]:
print(n_movies)

5


In [103]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

S = cosine_similarity(G)

In [160]:
import numpy as np
from numpy.linalg import solve
latent_dim=10
reg_param=0.1
max_iters=1000
R_train, R_val = create_train_test_masks(R)
n_users, n_movies = R.shape
U = np.random.rand(n_users * n_movies, latent_dim)
M = np.random.rand(n_movies, latent_dim) 


for iteration in range(max_iters):
    best_val_score = 100
    counter = 0
    # Solve for U while keeping M fixed
    for u in range(n_users):
        user = u % n_movies
        relevant_movies = R_train[user, :].nonzero()[0]  # Movies rated by user u
        if len(relevant_movies) > 0:
            M_subset = M[relevant_movies]  # Extract only relevant movie embeddings
            R_u = R_train[user, relevant_movies]  # Get known ratings

            # Solve least squares problem: (M.T @ M + λI)U = M.T @ R
            U[user] = solve(M_subset.T @ M_subset + reg_param * np.eye(latent_dim), M_subset.T @ R_u)

    # Solve for M while keeping U fixed
    for m in range(n_movies):
        relevant_users = R_train[:, m].nonzero()[0]  # Users who rated movie m
        if len(relevant_users) > 0:
            expanded_users = relevant_users[:, None] * n_movies + np.arange(n_movies)
            expanded_users = expanded_users.flatten()
            U_subset = U[expanded_users]  # Extract relevant user embeddings
            R_m = R_train[relevant_users, m] # Get known ratings
            repeated_R_m = np.repeat(R_m, n_movies)
            # Solve least squares problem: (U.T @ U + λI)M = U.T @ R
            M[m] = solve(U_subset.T @ U_subset + reg_param * np.eye(latent_dim), U_subset.T @ repeated_R_m)

    if iteration % 1 == 0:
        # Compute RMSE only on known ratings
        predicted_ratings = U @ M.T
        refined_ratings = np.zeros((n_users, n_movies))
        for m in range(n_movies):
            for u in range(n_users):
                refined_ratings[u, m] = predicted_ratings[u*n_movies+m, m]
        
        train_mask = R_train > 0  # Only compare known ratings
        val_mask = R_val > 0
        train_error = np.sqrt(np.sum((R - refined_ratings) ** 2 * train_mask) / np.sum(train_mask))
        val_error = np.sqrt(np.sum((R - refined_ratings) ** 2 * val_mask) / np.sum(val_mask))
        print(f"Iteration {iteration}, Train RMSE: {train_error:.4f}, Val RMSE: {val_error:.4f}")
        if val_error < best_val_score:
            best_val_score = val_error
            counter = 0
        else:
            counter += 1
    
    if counter == 5:
        break

: 

In [114]:
m = 10
movie_rows = np.arange(U.shape[0])[np.arange(U.shape[0]) % n_movies == m]
U = np.random.rand(n_users * n_movies, latent_dim)
U_filtered = U[movie_rows]  # Get all user-movie specific latent vectors for movie m

In [122]:
indices = np.array([m + i*n_movies for i in range(n_users)])

In [121]:
len(indices)

600

In [123]:
U[indices].shape

(600, 50)

In [None]:
n_

In [None]:
import numpy as np
from numpy.linalg import solve
latent_dim=50
reg_param=0.1
max_iters=1000
R_train, R_val = create_train_test_masks(R)
U = np.random.rand(n_users * n_movies, latent_dim)
M = np.random.rand(n_users * n_movies, latent_dim)

for iteration in range(max_iters):
    best_val_score = 100
    counter = 0
    # Solve for U while keeping M fixed
    for u in range(n_users):
        relevant_movies = R_train[u, :].nonzero()[0]  # Movies rated by user u
        if len(relevant_movies) > 0:
            M_subset = M[relevant_movies]  # Extract only relevant movie embeddings
            R_u = R_train[u, relevant_movies]  # Get known ratings

            # Solve least squares problem: (M.T @ M + λI)U = M.T @ R
            U[u] = solve(M_subset.T @ M_subset + reg_param * np.eye(latent_dim), M_subset.T @ R_u)

    # Solve for M while keeping U fixed
    for m in range(n_movies):
        relevant_users = R_train[:, m].nonzero()[0]  # Users who rated movie m
        if len(relevant_users) > 0:
            U_subset = U[relevant_users]  # Extract relevant user embeddings
            R_m = R_train[relevant_users, m] # Get known ratings

            # Solve least squares problem: (U.T @ U + λI)M = U.T @ R
            M[m] = solve(U_subset.T @ U_subset + reg_param * np.eye(latent_dim), U_subset.T @ R_m)

    if iteration % 5 == 0:
        # Compute RMSE only on known ratings
        predicted_ratings = U @ M.T
        train_mask = R_train > 0  # Only compare known ratings
        val_mask = R_val > 0
        train_error = np.sqrt(np.sum((R - predicted_ratings) ** 2 * train_mask) / np.sum(train_mask))
        val_error = np.sqrt(np.sum((R - predicted_ratings) ** 2 * val_mask) / np.sum(val_mask))
        print(f"Iteration {iteration}, Train RMSE: {train_error:.4f}, Val RMSE: {val_error:.4f}")
        if val_error < best_val_score:
            best_val_score = val_error
            counter = 0
        else:
            counter += 1
    
    if counter == 5:
        break

In [1]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.13.1-cp38-cp38-macosx_12_0_arm64.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
  Downloading tensorflow-2.13.0-cp38-cp38-macosx_12_0_arm64.whl.metadata (2.6 kB)
Collecting tensorflow-macos==2.13.0 (from tensorflow)
  Downloading tensorflow_macos-2.13.0-cp38-cp38-macosx_12_0_arm64.whl.metadata (3.2 kB)
Collecting absl-py>=1.0.0 (from tensorflow-macos==2.13.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-macos==2.13.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.1.21 (from tensorflow-macos==2.13.0->tensorflow)
  Downloading flatbuffers-25.1.24-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow-macos==2.13.0->tensorflow)
  Downloading gast-0.

In [56]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Flatten, Dense, Concatenate, Input
from tensorflow.keras.models import Model

def build_ncf_model(n_users, n_movies, latent_dim=10):
    """
    Builds a Neural Collaborative Filtering (NCF) model using TensorFlow.
    
    Parameters:
    - n_users: int, number of users
    - n_movies: int, number of movies
    - latent_dim: int, size of embedding space
    
    Returns:
    - Compiled NCF model
    """
    # Input layers
    user_input = Input(shape=(1,), name="user_input")
    movie_input = Input(shape=(1,), name="movie_input")

    # Embedding layers
    user_embedding = Embedding(n_users, latent_dim, name="user_embedding")(user_input)
    movie_embedding = Embedding(n_movies, latent_dim, name="movie_embedding")(movie_input)

    # Flatten embeddings
    user_vector = Flatten()(user_embedding)
    movie_vector = Flatten()(movie_embedding)

    # GMF (Generalized Matrix Factorization) Component
    gmf_output = tf.keras.layers.multiply([user_vector, movie_vector])  

    # MLP (Neural Network) Component
    mlp_input = Concatenate()([user_vector, movie_vector])
    mlp_hidden = Dense(64, activation='relu')(mlp_input)
    mlp_hidden = Dense(32, activation='relu')(mlp_hidden)
    mlp_hidden = Dense(16, activation='relu')(mlp_hidden)

    # Concatenate GMF and MLP outputs
    neuMF_output = Concatenate()([gmf_output, mlp_hidden])

    # Final prediction layer
    output = Dense(1, activation='sigmoid', name="output_layer")(neuMF_output)

    # Build model
    model = Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    return model


# Assume we have 1000 users and 5000 movies
n_users = 600
n_movies = 2000
latent_dim = 50  # Number of latent factors

# Build the model
ncf_model = build_ncf_model(n_users, n_movies, latent_dim)
ncf_model.summary()


Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 movie_input (InputLayer)    [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 50)                30000     ['user_input[0][0]']          
                                                                                                  
 movie_embedding (Embedding  (None, 1, 50)                100000    ['movie_input[0][0]']         
 )                                                                                          

In [39]:
R_train, R_val = create_train_test_masks(R, seed=42)

In [40]:
import numpy as np

train_users, train_movies = np.where(R_train > 0)  # (users, movies)
val_users, val_movies = np.where(R_val > 0)  # (users, movies)
train_ratings = R[train_users, train_movies]/5 # Normalized ratings between 0 and 1
val_ratings = R[val_users, val_movies]/5

In [43]:
len(val_ratings)

3305

In [44]:
# Train the model
ncf_model.fit([train_users, train_movies], train_ratings, epochs=100, batch_size=256)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x31508f460>

In [58]:
n_users = 600
n_movies = 2000
latent_dim = 50  # Number of latent factors

# Build the model
ncf_model = build_ncf_model(n_users, n_movies, latent_dim)
#ncf_model.summary()

history = ncf_model.fit(
    [train_users, train_movies], train_ratings,
    validation_data=([val_users, val_movies], val_ratings),  # Monitor validation loss
    epochs=1000, batch_size=256
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [59]:
import numpy as np

# Get number of users and movies from the rating matrix
n_users, n_movies = R.shape

# Create all possible user-movie pairs
user_grid, movie_grid = np.meshgrid(np.arange(n_users), np.arange(n_movies), indexing='ij')

# Flatten to pass into model
user_pairs = user_grid.flatten()
movie_pairs = movie_grid.flatten()

# Predict ratings for all user-movie pairs
predicted_ratings = ncf_model.predict([user_pairs, movie_pairs])

# Reshape predictions into (n_users, n_movies)
predicted_rating_matrix = predicted_ratings.reshape(n_users, n_movies)

print(predicted_rating_matrix.shape)  # Should be (n_users, n_movies)


(600, 2000)


In [63]:
pred_matrix = predicted_rating_matrix * 5

In [72]:
for i, j in zip(pred_matrix[0], predicted_ratings[0]):
    print(i, j)

3.4724417 4.831324718646622
2.3212247 2.904540716905392
2.295945 3.4158674236131605
3.6386015 3.1560583881910724
2.6134765 4.167389132599537
3.0010593 2.662813101035745
2.9996622 2.260073072662582
3.0032585 3.9443244899464887
3.0018818 2.6270347163322585
4.5579033 3.440178903777096
3.017016 3.4154119780139283
2.9996312 1.818096199680186
2.5862813 2.718822668068285
2.9307485 3.8698636497950676
3.0013087 3.871935049815557
4.9994783 2.8863778454469062
5.0 3.8609154661953675
3.0034776 2.7571970818366145
2.005492 3.691798103092633
3.0007 3.9776529816084767
3.063047 3.5865676379687907
3.0008183 2.388189201492732
2.998361 3.655209996676699
3.9004347 3.6967903561870523
3.5635152 3.662585455066222
3.0042305 3.3670506026168816
3.0026522 3.195799952462805
2.999269 3.4431632561597194
2.6128078 2.875528377223158
2.0084858 2.1548239361949793
4.9727917 4.390184528932116
2.9988492 4.0337792145183755
3.9971635 3.395807703233415
4.9984493 2.9901165713636817
3.096678 3.9608206753921062
5.0 3.524047264791

In [93]:
import numpy as np

def masked_rmse(matrix1, matrix2):
    """
    Compute RMSE between two matrices, considering only nonzero entries.
    
    Parameters:
    - matrix1: np.array (first matrix)
    - matrix2: np.array (second matrix)
    
    Returns:
    - RMSE (float)
    """
    # Create a mask of nonzero entries
    mask = matrix1 != 0  # Boolean mask where matrix1 has nonzero values
    
    # Compute the squared error only for nonzero entries
    squared_errors = (matrix1[mask] - matrix2[mask]) ** 2
    
    # Compute RMSE
    rmse = np.sqrt(np.mean(squared_errors))
    return rmse

masked_rmse(R, predicted_ratings)


0.01486034398759105

In [77]:
R.nonzero()

(array([  0,   0,   0, ..., 599, 599, 599]),
 array([ 127,  141,  151, ..., 1833, 1931, 1969]))

In [91]:
R[599, 1969]

3.0

In [90]:
predicted_ratings[599, 1969]

2.9866130177109773