In [13]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from numpy import loadtxt
import pandas as pd


In [14]:
df = pd.read_csv("ml-latest-small/ratings.csv")
df_small = df.head(900)


In [15]:

matrix = df.pivot(index='userId', columns='movieId', values='rating')

matrix_array = matrix.to_numpy()

matrix_array[matrix_array == 0] = np.nan



In [16]:

# Replace NaN values with 0
matrix_array_cleaned = np.nan_to_num(matrix_array, nan=0.0)

# Compute the SVD
U, Sigma, VT = np.linalg.svd(matrix_array_cleaned, full_matrices=False)

# Print the shapes of U, Sigma, and VT
print("Shape of U:", U.shape)
print("Shape of Sigma:", Sigma.shape)
print("Shape of VT:", VT.shape)

# Print the singular values
print("Singular values:", Sigma)

# Optional: Reconstruct the original matrix using SVD components
reconstructed_matrix = U @ np.diag(Sigma) @ VT
print("Reconstructed matrix:", reconstructed_matrix)

Shape of U: (610, 610)
Shape of Sigma: (610,)
Shape of VT: (610, 9724)
Singular values: [534.41989777 231.23661142 191.1508762  170.42250831 154.552948
 147.33575651 135.65556768 122.66302989 121.44217651 113.11144323
 109.60313933 107.93266172 105.97376877 102.05675293  99.87323589
  99.28999246  97.11713355  93.40879296  92.32408574  90.97607986
  90.42515264  88.83466993  87.29627026  86.05702164  85.15393734
  83.04476272  82.40743887  81.73690785  80.86997674  79.52408732
  79.16948319  78.84651534  78.00723454  76.71225804  75.6272454
  75.40667214  74.96494138  74.29201322  74.05266585  73.45188037
  73.2246949   72.46953282  71.70985332  70.20660519  70.02143448
  69.93495369  69.50676339  69.07855191  68.43455046  67.8676482
  67.49922664  67.48404012  67.06467382  66.58800288  66.0811711
  65.46149276  64.93460662  64.84974658  64.3412403   63.92922187
  63.80270853  63.6549702   63.21158802  63.02672434  62.8347854
  62.30489869  61.71605686  61.54464024  61.24327624  60.805

In [17]:
print(matrix_array_cleaned.shape)

(610, 9724)


In [18]:
# Assume U, Sigma, and VT are obtained from SVD
# Let's generate predictions for all user-item pairs

def generate_predictions(U, Sigma, VT, num_recommendations=5):
    predicted_ratings = np.dot(U, np.dot(np.diag(Sigma), VT))
    return predicted_ratings

predicted_ratings = generate_predictions(U, Sigma, VT)

# Now, let's make recommendations for a specific user (user_id)
def recommend_items(user_id, predicted_ratings, num_recommendations=5):
    user_ratings = predicted_ratings[user_id - 1]  # User ID is 1-indexed
    sorted_indices = np.argsort(user_ratings)[::-1]  # Sort in descending order
    recommended_items = []
    for i in sorted_indices:
        if len(recommended_items) == num_recommendations:
            break
        if np.isnan(user_ratings[i]):  # Skip items with NaN ratings
            continue
        recommended_items.append(i + 1)  # Item ID is 1-indexed
    return recommended_items

user_id = 1  # Example user ID
recommended_items = recommend_items(user_id, predicted_ratings)
print(f"Top recommended items for user {user_id}: {recommended_items}")


Top recommended items for user 1: [462, 399, 521, 898, 816]


In [19]:
def get_predicted_and_actual_rating(user_id, item_id, predicted_ratings, original_ratings):
    predicted_rating = predicted_ratings[user_id - 1, item_id - 1]  # User and item IDs are 1-indexed
    actual_rating = original_ratings[user_id - 1, item_id - 1]  # User and item IDs are 1-indexed
    return predicted_rating, actual_rating


In [20]:
user_id = 1  # Example user ID
item_id = 1  # Example item ID
predicted_rating, actual_rating = get_predicted_and_actual_rating(user_id, item_id, predicted_ratings, matrix_array_cleaned)
print(f"Predicted rating for user {user_id} and item {item_id}: {predicted_rating}")
print(f"Actual rating for user {user_id} and item {item_id}: {actual_rating}")



Predicted rating for user 1 and item 1: 4.000000000000031
Actual rating for user 1 and item 1: 4.0


In [21]:

def get_predicted_and_actual_rating(user_id, item_id, predicted_ratings, original_ratings):
    predicted_rating = predicted_ratings[user_id - 1, item_id - 1]  # User and item IDs are 1-indexed
    actual_rating = original_ratings[user_id - 1, item_id - 1]  # User and item IDs are 1-indexed
    return predicted_rating, actual_rating

# Create empty lists to store data
user_ids = []
item_ids = []
predicted_ratings_list = []
actual_ratings_list = []
num_users = 7 
num_movies = 725
# Iterate over all user-item pairs
for user_id in range(1, num_users + 1):  # Assuming user IDs are 1-indexed
    for item_id in range(1, num_movies + 1):  # Assuming movie IDs are 1-indexed
        predicted_rating, actual_rating = get_predicted_and_actual_rating(user_id, item_id, predicted_ratings, matrix_array_cleaned)
        user_ids.append(user_id)
        item_ids.append(item_id)
        predicted_ratings_list.append(predicted_rating)
        actual_ratings_list.append(actual_rating)

# Create DataFrame
ratings_df = pd.DataFrame({
    'User_ID': user_ids,
    'Item_ID': item_ids,
    'Predicted_Rating': predicted_ratings_list,
    'Actual_Rating': actual_ratings_list
})

# Print the DataFrame
print(ratings_df.head(100))


    User_ID  Item_ID  Predicted_Rating  Actual_Rating
0         1        1      4.000000e+00            4.0
1         1        2      1.370432e-14            0.0
2         1        3      4.000000e+00            4.0
3         1        4      1.469311e-15            0.0
4         1        5      1.194184e-14            0.0
..      ...      ...               ...            ...
95        1       96     -1.824062e-15            0.0
96        1       97      7.953707e-16            0.0
97        1       98      4.000000e+00            4.0
98        1       99      1.502271e-14            0.0
99        1      100      6.290107e-15            0.0

[100 rows x 4 columns]


In [22]:
from sklearn.metrics import mean_squared_error

# Remove rows with NaN actual ratings
ratings_df_cleaned = ratings_df.dropna(subset=['Actual_Rating'])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(ratings_df_cleaned['Actual_Rating'], ratings_df_cleaned['Predicted_Rating']))

print("RMSE:", rmse)


RMSE: 5.187287966568021e-15


In [23]:
print(ratings_df[30:60])


    User_ID  Item_ID  Predicted_Rating  Actual_Rating
30        1       31      4.749673e-15            0.0
31        1       32      2.443878e-14            0.0
32        1       33      3.242719e-14            0.0
33        1       34      1.804112e-14            0.0
34        1       35      2.997819e-16            0.0
35        1       36      1.565414e-14            0.0
36        1       37     -3.469447e-16            0.0
37        1       38      2.404327e-15            0.0
38        1       39      1.592476e-15            0.0
39        1       40      3.115563e-15            0.0
40        1       41      9.636389e-16            0.0
41        1       42      1.536271e-14            0.0
42        1       43      7.855126e-15            0.0
43        1       44      5.000000e+00            5.0
44        1       45      3.205769e-15            0.0
45        1       46      1.691355e-17            0.0
46        1       47      5.000000e+00            5.0
47        1       48      1.