# 电影推荐算法比较实验

本实验实现并比较两种协同过滤电影推荐算法：User-CF和Item-CF

## 一、数据加载与预处理

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import kagglehub

# Download latest version
path = kagglehub.dataset_download("/data")

print("Path to dataset files:", path)

# 加载数据
train_data = pd.read_csv('data/recommendation-ratings-train.txt', sep='	')
test_data = pd.read_csv('data/recommendation-ratings-test.txt', sep='	')

# 转换为用户-物品评分矩阵
user_item_train = train_data.pivot_table(index='userld', columns='movield', values='rating', fill_value=0)
user_item_test = test_data.pivot_table(index='userld', columns='movield', values='rating', fill_value=0)


## 二、User-CF算法实现

In [None]:
def user_cf_predict(user_item, user_id, movie_id, k=50):
    # 计算用户相似度矩阵
    user_similarity = cosine_similarity(user_item)
    np.fill_diagonal(user_similarity, 0)  # 自身相似度设为0
    
    # 获取相似用户指数
    similar_users = np.argsort(-user_similarity[user_item.index.get_loc(user_id)])[:k]
    
    # 计算预测评分
    mean_user_rating = user_item.mean(axis=1)
    user_ratings = user_item.iloc[similar_users]
    similarity_weights = user_similarity[user_item.index.get_loc(user_id)][similar_users]
    dev = (user_ratings - mean_user_rating.iloc[similar_users].values.reshape(-1,1))
    pred_rating = mean_user_rating[user_id] + np.dot(similarity_weights, dev[movie_id]) / np.sum(np.abs(similarity_weights))
    return pred_rating


## 三、Item-CF算法实现

In [None]:
def item_cf_predict(user_item, user_id, movie_id, k=50):
    # 转置为物品-用户矩阵
    item_user = user_item.T
    
    # 计算物品相似度矩阵
    item_similarity = cosine_similarity(item_user)
    np.fill_diagonal(item_similarity, 0)
    
    # 获取相似物品指数
    similar_items = np.argsort(-item_similarity[item_user.index.get_loc(movie_id)])[:k]
    
    # 计算预测评分
    user_ratings = item_user.iloc[similar_items][user_id]
    similarity_weights = item_similarity[item_user.index.get_loc(movie_id)][similar_items]
    pred_rating = np.dot(similarity_weights, user_ratings) / np.sum(np.abs(similarity_weights))
    return pred_rating


## 四、RMSE评估与结果比较

In [None]:
# 预测User-CF结果
user_cf_preds = test_data.apply(lambda row: user_cf_predict(user_item_train, row['userld'], row['movield']), axis=1)
user_cf_rmse = np.sqrt(mean_squared_error(test_data['rating'], user_cf_preds))

# 预测Item-CF结果
item_cf_preds = test_data.apply(lambda row: item_cf_predict(user_item_train, row['userld'], row['movield']), axis=1)
item_cf_rmse = np.sqrt(mean_squared_error(test_data['rating'], item_cf_preds))

print(f'User-CF RMSE: {user_cf_rmse:.4f}')
print(f'Item-CF RMSE: {item_cf_rmse:.4f}')
