离线训练（定期执行）：

In [4]:
import numpy as np
import pandas as pd
import redis
import json
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import Dataset, Reader, KNNBasic, SVD

In [7]:
# 假设我们有一个包含 'user_id', 'item_id', 'rating' 的 DataFrame: ratings_df

def train_svd_model(ratings_df, n_factors=50):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(ratings_df[['user_id', 'item_id', 'rating']], reader)

    # 使用整个数据集进行训练
    trainset = data.build_full_trainset()

    # 训练SVD模型
    algo = SVD(n_factors=n_factors)
    algo.fit(trainset)

    return algo

In [6]:
# 从文件加载数据
# rating_file = os.path.join('file', 'output.dat')
rating_file = '../file/output2.dat'
reader = Reader(line_format='user item rating', sep='::')
data = Dataset.load_from_file(rating_file, reader=reader)
# 将Surprise数据集转换为pandas DataFrame
df = pd.DataFrame(data.raw_ratings, columns=['user_id', 'item_id', 'rating', 'timestamp'])

# 如果不需要timestamp，可以删除它
df = df.drop('timestamp', axis=1)

# 确保user_id和item_id是整数类型，rating是浮点类型
df['user_id'] = df['user_id'].astype(int)
df['item_id'] = df['item_id'].astype(int)
df['rating'] = df['rating'].astype(float)
# 现在df就是我们需要的ratings_df
ratings_df = df

print(ratings_df.head())

    user_id  item_id  rating
0  10674015    15509    1.55
1  10674015    15505    1.55
2  10674015    26058    1.55
3  11365761    17107    2.38
4  11365761    32456    2.38


In [None]:
# 训练模型
svd_model = train_svd_model(ratings_df)


# 为所有用户生成推荐
def generate_recommendations(svd_model, ratings_df, n_items=100):
    unique_users = ratings_df['user_id'].unique()
    unique_items = ratings_df['item_id'].unique()

    recommendations = {}
    for user in unique_users:
        user_predictions = [
            (item, svd_model.predict(user, item).est)
            for item in unique_items
        ]
        user_predictions.sort(key=lambda x: x[1], reverse=True)
        recommendations[user] = user_predictions[:n_items]

    return recommendations


# 生成推荐
all_recommendations = generate_recommendations(svd_model, ratings_df)

# 将结果存储到Redis
r = redis.Redis(host='192.168.22.238', port=6379, db=0)

for user, recs in all_recommendations.items():
    r.set(f"user:{user}:recommendations", json.dumps(recs))

print("SVD model trained and recommendations stored.")

In [9]:
for user, recs in all_recommendations.items():
    r.set(f"user:{user}:recommendations", recs)

DataError: Invalid input of type: 'list'. Convert to a bytes, string, int or float first.

在线推荐服务（使用 FastAPI）：

In [ ]:
from fastapi import FastAPI, HTTPException
import redis
import json

app = FastAPI()
r = redis.Redis(host='localhost', port=6379, db=0)


@app.get("/recommend/{user_id}")
async def get_recommendations(user_id: int):
    # 从Redis获取推荐
    recommendations = r.get(f"user:{user_id}:recommendations")

    if recommendations:
        recommendations = json.loads(recommendations)
        # 只返回物品ID
        item_ids = [item[0] for item in recommendations[:10]]
        return {"user_id": user_id, "recommendations": item_ids}
    else:
        raise HTTPException(status_code=404, detail="No recommendations found for this user")


if __name__ == '__main__':
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8000)

定期更新脚本（保持不变）：

In [ ]:
import schedule
import time


def update_svd_model():
    # 这里放置更新模型的代码
    # 可以是上面的离线训练代码
    print("Updating SVD model...")
    # train_svd_model(...)


# 设置定期更新，例如每天凌晨2点
schedule.every().day.at("02:00").do(update_svd_model)

while True:
    schedule.run_pending()
    time.sleep(1)

In [1]:
# ! pip install schedule

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting schedule
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/20/a7/84c96b61fd13205f2cafbe263cdb2745965974bdf3e0078f121dfeca5f02/schedule-1.2.2-py3-none-any.whl (12 kB)
Installing collected packages: schedule
Successfully installed schedule-1.2.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
