In [1]:
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [7]:


# Create your 5x5 matrix
np.random.seed(12)
m = np.random.randint(1, 101, size=(5, 5))

# Simulate explicit ratings (1–5, like Netflix)
m_ratings = np.ceil(m / 20).astype(int)

# Simulate unary data (1 = interaction, 0 = no interaction)
m_unary = (m > 50).astype(int)

# Introduce missing values (40% sparsity)
np.random.seed(42)
mask = np.random.choice([1, 0], size=(5, 5), p=[0.6, 0.4])  # 60% kept, 40% missing
m_ratings_sparse = m_ratings * mask  # Set missing entries to 0
m_unary_sparse = m_unary * mask

print("Sparse rating matrix (explicit, 1–5, 0 = missing):")
print(m_ratings_sparse)
print("\nSparse unary matrix (implicit, 1/0, 0 = missing/no interaction):")
print(m_unary_sparse)


Sparse rating matrix (explicit, 1–5, 0 = missing):
[[4 0 0 1 1]
 [4 4 0 0 0]
 [3 0 0 5 2]
 [4 4 1 4 1]
 [0 5 2 4 2]]

Sparse unary matrix (implicit, 1/0, 0 = missing/no interaction):
[[1 0 0 0 0]
 [1 1 0 0 0]
 [1 0 0 1 0]
 [1 1 0 1 0]
 [0 1 0 1 0]]


In [8]:
# Function to compute co-rated cosine similarity
def co_rated_cosine_similarity(matrix):
    n_users = matrix.shape[0]
    sim_matrix = np.zeros((n_users, n_users))
    for u in range(n_users):
        for v in range(u, n_users):
            # Identify co-rated items (non-zero for both users)
            common_items = (matrix[u] != 0) & (matrix[v] != 0)
            if common_items.sum() > 0:  # At least 1 co-rated item
                ratings_u = matrix[u, common_items]
                ratings_v = matrix[v, common_items]
                norm_u = np.sqrt(np.sum(ratings_u ** 2))
                norm_v = np.sqrt(np.sum(ratings_v ** 2))
                if norm_u > 0 and norm_v > 0:
                    sim = np.dot(ratings_u, ratings_v) / (norm_u * norm_v)
                    sim_matrix[u, v] = sim
                    sim_matrix[v, u] = sim
                else:
                    sim_matrix[u, v] = 0
                    sim_matrix[v, u] = 0
            else:
                sim_matrix[u, v] = 0
                sim_matrix[v, u] = 0
    return sim_matrix

In [9]:
# Compute co-rated cosine similarity
sim_ratings = co_rated_cosine_similarity(m_ratings_sparse)
sim_unary = co_rated_cosine_similarity(m_unary_sparse)

# Convert to distance matrix
dist_ratings = 1 - sim_ratings
dist_unary = 1 - sim_unary
dist_ratings = np.clip(dist_ratings, 0, 1)
dist_unary = np.clip(dist_unary, 0, 1)

# Fit NearestNeighbors
model_knn_ratings = NearestNeighbors(metric='precomputed', algorithm='brute')
model_knn_ratings.fit(dist_ratings)

model_knn_unary = NearestNeighbors(metric='precomputed', algorithm='brute')
model_knn_unary.fit(dist_unary)

In [10]:
# Predict missing values
def predict_values(matrix, similarity, k=2):
    n_users, n_items = matrix.shape
    predictions = np.zeros_like(matrix, dtype=float)
    for u in range(n_users):
        # Find top k similar users (excluding self)
        sim_scores = similarity[u].copy()
        sim_scores[u] = -1  # Exclude self
        top_k_users = np.argsort(sim_scores)[-k:]  # Top k neighbors
        for i in range(n_items):
            # Predict rating/interaction for user u, item i
            sim_sum = sum(sim_scores[v] for v in top_k_users if matrix[v, i] != 0)
            if sim_sum > 0:
                pred = sum(sim_scores[v] * matrix[v, i] for v in top_k_users if matrix[v, i] != 0) / sim_sum
                predictions[u, i] = pred
    return predictions

# Predict ratings and interactions
pred_ratings = predict_values(m_ratings_sparse, sim_ratings, k=2)
pred_unary = predict_values(m_unary_sparse, sim_unary, k=2)

print("\nPredicted ratings (explicit, 0 = no prediction):")
print(np.round(pred_ratings, 2))
print("\nPredicted interactions (unary, 0 = no prediction):")
print(np.round(pred_unary, 2))


Predicted ratings (explicit, 0 = no prediction):
[[4.   4.49 2.   4.   2.  ]
 [3.   5.   2.   4.5  2.  ]
 [4.   4.5  2.   4.   2.  ]
 [4.   4.49 2.   4.   2.  ]
 [3.5  4.   0.   5.   2.  ]]

Predicted interactions (unary, 0 = no prediction):
[[1. 1. 0. 1. 0.]
 [1. 1. 0. 1. 0.]
 [1. 1. 0. 1. 0.]
 [1. 1. 0. 1. 0.]
 [1. 1. 0. 1. 0.]]


In [11]:

# Evaluate known values with MAE and RMSE
# Use original non-missing values from m_ratings and m_unary
actual_ratings = m_ratings[mask == 1].flatten()
predicted_ratings = pred_ratings[mask == 1].flatten()
actual_unary = m_unary[mask == 1].flatten()
predicted_unary = pred_unary[mask == 1].flatten()

mae_ratings = mean_absolute_error(actual_ratings[predicted_ratings != 0], predicted_ratings[predicted_ratings != 0])
rmse_ratings = np.sqrt(mean_squared_error(actual_ratings[predicted_ratings != 0], predicted_ratings[predicted_ratings != 0]))
mae_unary = mean_absolute_error(actual_unary[predicted_unary != 0], predicted_unary[predicted_unary != 0])
rmse_unary = np.sqrt(mean_squared_error(actual_unary[predicted_unary != 0], predicted_unary[predicted_unary != 0]))

print("\nEvaluation for explicit ratings (known values):")
print(f"MAE: {mae_ratings:.2f}")
print(f"RMSE: {rmse_ratings:.2f}")

print("\nEvaluation for unary data (known values):")
print(f"MAE: {mae_unary:.2f}")
print(f"RMSE: {rmse_unary:.2f}")


Evaluation for explicit ratings (known values):
MAE: 0.78
RMSE: 1.07

Evaluation for unary data (known values):
MAE: 0.09
RMSE: 0.30
