In [1]:
import numpy as np
import pandas as pd
import csv
import math


In [7]:
# 加载数据
movies = pd.read_csv("datasets/ml-25m/movies.csv")
links = pd.read_csv("datasets/ml-25m/links.csv")
tags = pd.read_csv("datasets/ml-25m/tags.csv")
from scipy.sparse import csr_matrix
ratings = pd.read_csv("datasets/ml-25m/ratings.csv")
# 将电影ID和用户ID转换为连续索引
user_ids = ratings['userId'].unique()
movie_ids = ratings['movieId'].unique()
user_id_map = {id_: idx for idx, id_ in enumerate(user_ids)}
movie_id_map = {id_: idx for idx, id_ in enumerate(movie_ids)}
ratings['userId'] = ratings['userId'].map(user_id_map)
ratings['movieId'] = ratings['movieId'].map(movie_id_map)
# 创建用户-电影交互矩阵
num_users = len(user_ids)
num_movies = len(movie_ids)
interaction_matrix = csr_matrix((ratings['rating'], (ratings['userId'], ratings['movieId'])), shape=(num_users, num_movies))
print("Interaction matrix shape:", interaction_matrix.shape)

# 将矩阵转换为DataFrame
interaction_df = pd.DataFrame.sparse.from_spmatrix(interaction_matrix, index=user_ids, columns=movie_ids)

In [14]:
import numpy as np

def get_top_k_recommendations(user_ratings, k):
    # 根据评分生成推荐列表，这里假设评分已经按降序排列
    return np.argsort(user_ratings)[::-1][:k]

def recall_at_k(recommended_items, relevant_items, k):
    recommended_items_at_k = recommended_items[:k]
    relevant_items_set = set(relevant_items)
    hits = sum(1 for item in recommended_items_at_k if item in relevant_items_set)
    return hits / len(relevant_items_set)

def dcg_at_k(recommended_items, relevant_items, k):
    relevance_scores = [1 if item in relevant_items else 0 for item in recommended_items[:k]]
    return sum(score / np.log2(idx + 2) for idx, score in enumerate(relevance_scores))

def ndcg_at_k(recommended_items, relevant_items, k):
    dcg_max = dcg_at_k(sorted(relevant_items, key=lambda x: 1, reverse=True), relevant_items, k)
    if not dcg_max:
        return 0.
    return dcg_at_k(recommended_items, relevant_items, k) / dcg_max

def hit_ratio_at_k(recommended_items, relevant_items, k):
    return int(any(item in recommended_items[:k] for item in relevant_items))

# Example usage:
user_ratings = np.array([4.0, 3.5, 5.0, 2.0, 4.5])
# 获取前3个推荐物品的索引
recommended_items = get_top_k_recommendations(user_ratings, k=3)
relevant_items = [2, 4]  # 用户实际喜欢的物品索引

k = 3
print(f"Recall@{k}: {recall_at_k(recommended_items, relevant_items, k)}")
print(f"NDCG@{k}: {ndcg_at_k(recommended_items, relevant_items, k)}")
print(f"Hit Ratio@{k}: {hit_ratio_at_k(recommended_items, relevant_items, k)}")

Recall@3: 1.0
NDCG@3: 1.0
Hit Ratio@3: 1


In [15]:
def evaluate_recommendation_system(model, test_data, threshold, k):
    recall_scores = []
    ndcg_scores = []
    hit_ratio_scores = []

    for user in test_data:
        user_ratings = model.predict(user)  # 根据模型预测用户的评分
        recommended_items = get_top_k_recommendations(user_ratings, k)
        relevant_items = [item for item, rating in test_data[user] if rating >= threshold]  # 根据评分阈值确定用户喜欢的物品
        if not relevant_items:
            continue
        recall_scores.append(recall_at_k(recommended_items, relevant_items, k))
        ndcg_scores.append(ndcg_at_k(recommended_items, relevant_items, k))
        hit_ratio_scores.append(hit_ratio_at_k(recommended_items, relevant_items, k))

    avg_recall = np.mean(recall_scores)
    avg_ndcg = np.mean(ndcg_scores)
    avg_hit_ratio = np.mean(hit_ratio_scores)

    return {
        'Recall@K': avg_recall,
        'NDCG@K': avg_ndcg,
        'Hit Ratio@K': avg_hit_ratio
    }

# Example usage:
# 模型预测函数需要实现
model = ...  # 你的推荐模型
test_data = {
    'user1': [(1, 4.0), (3, 5.0), (5, 4.5)],  # 测试集中每个用户的物品评分
    'user2': [(2, 4.0), (4, 4.5), (6, 5.0)]
}
threshold = 4.0  # 设定评分阈值
k = 3
metrics = evaluate_recommendation_system(model, test_data, threshold, k)
print(metrics)

AttributeError: 'ellipsis' object has no attribute 'predict'