In [1]:
from collections import defaultdict
from surprise import Dataset
from surprise import SVD, model_selection, accuracy

In [2]:
data = Dataset.load_builtin('ml-100k')

In [33]:
def precision_recall_at_k(predictions, k=10, threshold=3.5):
    '''返回精度和召回率'''

    # 
    user_est_true_r = defaultdict(list)
    for prediction in predictions:
        user_est_true_r[prediction.uid].append((prediction.est, prediction.r_ui))
    # 
    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true_r.items():
        # 按照预测分将序排列
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # 数据中真正相关的项目
        n_real = sum((true_r >= threshold) for (_, true_r) in user_ratings)
        
        # top-n推荐的item中大于阈值的
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])
        
        # 推荐的项目中相关的
        n_real_rec_k = sum(((est >= threshold) and (true_r >= threshold)) \
                            for (est, true_r) in user_ratings[:k])
#         print(prediction.uid)
        precisions[uid] = n_real_rec_k / n_rec_k if n_rec_k != 0 else 1
        recalls[uid] = n_real_rec_k / n_real if n_real != 0 else 1

    return precisions, recalls

In [35]:
algo = SVD()
kf = model_selection.KFold(n_splits=5)
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    pred = algo.test(testset)
    precisions, recalls = precision_recall_at_k(pred, k=5, threshold=4)
    
    # 所有用户算平均值
    print('精度:', sum(prec for prec in precisions.values()) / len(precisions))
    print('召回率:', sum(rec for rec in recalls.values()) / len(recalls))

精度: 0.8625088464260434
召回率: 0.25590772886173047
精度: 0.8833864118895963
召回率: 0.2639979593214614
精度: 0.8756627783669136
召回率: 0.26283300056707987
精度: 0.8781139419674447
召回率: 0.2570607229071825
精度: 0.8612230470130782
召回率: 0.25488633592062754
