#Evaluation of Recommendandation system for a dummy data

Evaluation parameters:

1.  Mean square error
2. Mean Absolute Error
3. Precision@k
4. Recall@k
5. Mean Average Precision
6. Mean Reciprocal Rank

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Generate dummy data
np.random.seed(0)

# Number of users and items
num_users = 10
num_items = 5

# Generate a random rating matrix (user-item matrix) where 0 means no rating
ratings = np.random.randint(0, 6, size=(num_users, num_items))  # Ratings between 0 to 5
ratings[ratings < 3] = 0  # Set ratings less than 3 to 0 to simulate missing ratings

# Generate a dummy prediction matrix for the purpose of evaluation
predictions = np.random.uniform(1, 5, size=(num_users, num_items))

# Flatten the matrices for evaluation
# Mask out entries with 0 rating (assuming they are unrated)
rated_indices = ratings > 0
true_ratings = ratings[rated_indices]
pred_ratings = predictions[rated_indices]

# Calculate evaluation metrics
mse = mean_squared_error(true_ratings, pred_ratings)
mae = mean_absolute_error(true_ratings, pred_ratings)

# Print results
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)

Mean Squared Error (MSE): 3.040845253294991
Mean Absolute Error (MAE): 1.4648213766228029


In [2]:
# Parameters
k = 3  # Define the cutoff rank (top-k)

# Dummy data: ground truth relevant items for each user
# Each user has a list of items they interacted with (considered relevant)
ground_truth = {
    0: [1, 3, 4],
    1: [0, 2],
    2: [1, 4],
    3: [3],
    4: [0, 2, 4],
    5: [2, 3],
    6: [1],
    7: [0, 1, 2],
    8: [4],
    9: [1, 3]
}

# Dummy data: recommended items for each user
# Each user has a list of recommended items (sorted by relevance)
recommendations = {
    0: [3, 1, 4, 2, 0],
    1: [2, 0, 3, 4, 1],
    2: [1, 0, 4, 3, 2],
    3: [3, 2, 1, 0, 4],
    4: [0, 4, 2, 3, 1],
    5: [3, 2, 0, 1, 4],
    6: [1, 4, 3, 2, 0],
    7: [0, 1, 2, 3, 4],
    8: [4, 3, 2, 1, 0],
    9: [1, 3, 4, 0, 2]
}

# Precision@k and Recall@k
def precision_at_k(recommended, relevant, k):
    recommended_k = recommended[:k]
    relevant_k = set(recommended_k) & set(relevant)
    return len(relevant_k) / k

def recall_at_k(recommended, relevant, k):
    recommended_k = recommended[:k]
    relevant_k = set(recommended_k) & set(relevant)
    return len(relevant_k) / len(relevant) if relevant else 0

# Mean Average Precision@k
def average_precision_at_k(recommended, relevant, k):
    relevant_k = set(relevant)
    score = 0.0
    num_hits = 0
    for i, item in enumerate(recommended[:k]):
        if item in relevant_k:
            num_hits += 1
            score += num_hits / (i + 1)
    return score / min(len(relevant), k) if relevant else 0

# Mean Reciprocal Rank@k
def reciprocal_rank_at_k(recommended, relevant, k):
    for i, item in enumerate(recommended[:k]):
        if item in relevant:
            return 1 / (i + 1)
    return 0

# Calculate metrics for all users and average them
precision_scores = []
recall_scores = []
map_scores = []
mrr_scores = []

for user in ground_truth.keys():
    relevant_items = ground_truth[user]
    recommended_items = recommendations[user]

    precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
    recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
    map_scores.append(average_precision_at_k(recommended_items, relevant_items, k))
    mrr_scores.append(reciprocal_rank_at_k(recommended_items, relevant_items, k))

# Averaging the scores
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_map = np.mean(map_scores)
mean_mrr = np.mean(mrr_scores)

# Print the results
print(f"Precision@{k}: {mean_precision:.4f}")
print(f"Recall@{k}: {mean_recall:.4f}")
print(f"MAP@{k}: {mean_map:.4f}")
print(f"MRR@{k}: {mean_mrr:.4f}")


Precision@3: 0.6667
Recall@3: 1.0000
MAP@3: 0.9833
MRR@3: 1.0000


Task to do: Build a movie recommendation system and evaluate it by the above defined parameters.


---
---



---




In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
file_path = '/content/ratings_movies.csv'
data = pd.read_csv(file_path)

In [8]:
# Create a user-item matrix
user_item_matrix = data.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [9]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
np.fill_diagonal(user_similarity, 0)  # Set diagonal to 0 to ignore self-similarity


In [10]:
# Generate predictions by averaging ratings of similar users (weighted by similarity)
predictions = user_similarity.dot(user_item_matrix) / np.array([np.abs(user_similarity).sum(axis=1)]).T

# Mask actual ratings (to simulate the test scenario) and evaluate
rated_indices = user_item_matrix > 0  # Indices of actual ratings
true_ratings = user_item_matrix.values[rated_indices]
pred_ratings = predictions[rated_indices]

In [11]:
# Calculate MSE and MAE
mse = mean_squared_error(true_ratings, pred_ratings)
mae = mean_absolute_error(true_ratings, pred_ratings)


In [12]:
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)

Mean Squared Error (MSE): 9.72605155620528
Mean Absolute Error (MAE): 2.938697599228125


In [13]:
# Parameters for Precision@k, Recall@k, MAP@k, MRR@k
k = 7


In [14]:
# Generate top-k recommendations for each user
top_k_recommendations = np.argsort(-predictions, axis=1)[:, :k]

In [15]:
print(top_k_recommendations)

[[ 257  314  277 ...  224  510  897]
 [ 277 1938  314 ... 6693  257 7355]
 [ 224  897 1938 ...  510  910  257]
 ...
 [ 314  277  257 ...  510  224 2224]
 [ 314  277  257 ...  510  123  418]
 [ 277 1938  314 ...  224 2224  510]]


In [16]:
# Create ground truth relevant items (non-zero actual ratings)
ground_truth = {i: np.where(user_item_matrix.iloc[i] > 0)[0].tolist() for i in range(user_item_matrix.shape[0])}

In [17]:
def precision_at_k(recommended, relevant, k):
    recommended_k = recommended[:k]
    relevant_k = set(recommended_k) & set(relevant)
    return len(relevant_k) / k

In [18]:
def recall_at_k(recommended, relevant, k):
    recommended_k = recommended[:k]
    relevant_k = set(recommended_k) & set(relevant)
    return len(relevant_k) / len(relevant) if relevant else 0

In [19]:
def average_precision_at_k(recommended, relevant, k):
    relevant_k = set(relevant)
    score = 0.0
    num_hits = 0
    for i, item in enumerate(recommended[:k]):
        if item in relevant_k:
            num_hits += 1
            score += num_hits / (i + 1)
    return score / min(len(relevant), k) if relevant else 0

In [20]:
def reciprocal_rank_at_k(recommended, relevant, k):
    for i, item in enumerate(recommended[:k]):
        if item in relevant:
            return 1 / (i + 1)
    return 0

In [21]:
# Evaluate metrics for all users
precision_scores = []
recall_scores = []
map_scores = []
mrr_scores = []

for user in range(user_item_matrix.shape[0]):
    relevant_items = ground_truth[user]
    recommended_items = top_k_recommendations[user]

    precision_scores.append(precision_at_k(recommended_items, relevant_items, k))
    recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
    map_scores.append(average_precision_at_k(recommended_items, relevant_items, k))
    mrr_scores.append(reciprocal_rank_at_k(recommended_items, relevant_items, k))

In [22]:
# Calculate averages
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_map = np.mean(map_scores)
mean_mrr = np.mean(mrr_scores)

print(f"Precision@{k}: {mean_precision:.4f}")
print(f"Recall@{k}: {mean_recall:.4f}")
print(f"MAP@{k}: {mean_map:.4f}")
print(f"MRR@{k}: {mean_mrr:.4f}")

Precision@7: 0.5974
Recall@7: 0.0603
MAP@7: 0.5135
MRR@7: 0.8366
