Surprise库的SVD实现不直接支持增量训练，不过可以通过其他方法实现类似效果。我们可以使用逐步更新模型的方式来模拟增量训练。

In [2]:
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd
from collections import defaultdict
import random

reader = Reader(line_format='user item rating', sep='::')
# 从文件加载数据
# rating_file = os.path.join('file', 'output.dat')
rating_file = '../file/output3.dat'

# 读取数据
data = Dataset.load_from_file(rating_file, reader=reader)


# 数据采样：随机抽取一定比例的数据（例如，10%）
def sample_data(data, sample_size=0.1):
    full_data = data.raw_ratings
    sample_size = int(len(full_data) * sample_size)
    sampled_data = random.sample(full_data, sample_size)
    return Dataset.load_from_df(pd.DataFrame(sampled_data, columns=['user', 'item', 'rating', 'timestamp']), reader)


# 增量训练
def incremental_train(data, batch_size=1000, n_factors=40, lr_all=0.007, reg_all=0.02):
    # 初始化模型
    algo = SVD(n_factors=n_factors, lr_all=lr_all, reg_all=reg_all)

    # 将数据分割为多个小批次
    full_data = data.raw_ratings
    random.shuffle(full_data)
    batches = [full_data[i:i + batch_size] for i in range(0, len(full_data), batch_size)]

    for batch in batches:
        batch_data = Dataset.load_from_df(pd.DataFrame(batch, columns=['user', 'item', 'rating']), reader)
        trainset = batch_data.build_full_trainset()
        algo.fit(trainset)

    return algo


# 数据采样
sampled_data = sample_data(data, sample_size=0.1)

# 数据划分
trainset, testset = train_test_split(sampled_data, test_size=0.25)

# 增量训练模型
algo = incremental_train(sampled_data, batch_size=1000, n_factors=40, lr_all=0.007, reg_all=0.02)

# 在测试集上进行预测并评估模型
predictions = algo.test(testset)
accuracy.rmse(predictions)


# 推荐函数
def get_top_n_recommendations(predictions, n=10):
    """为每个用户推荐前N个产品"""
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n


# 在全训练集上进行预测
train_predictions = algo.test(trainset.build_testset())

ValueError: too many values to unpack (expected 3)

In [0]:
# 获取推荐结果
user_id = '123'
top_n = get_top_n_recommendations(train_predictions, n=10)

# 打印推荐结果
for uid, user_ratings in top_n.items():
    if uid == user_id:
        print(f"User ——> {uid}:")
        for (iid, est) in user_ratings:
            print(f"  推荐 {iid}: 预测评分 {est}")
