In [None]:
# 导入必要的库
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.neighbors import NearestNeighbors

# 假设有用户行为数据和内容数据
user_behavior = pd.read_csv('user_behavior.csv')  # 包含用户ID、内容ID、行为类型（喜欢、评论、收藏）、时间戳等
content_data = pd.read_csv('content_data.csv')  # 包含内容ID、喜欢数量、评论数量、收藏数量、主题分类等

# 数据预处理
# 对主题分类进行One-Hot编码
encoder = OneHotEncoder()
content_data_encoded = encoder.fit_transform(content_data[['category']])

# 对数值型数据进行归一化处理
scaler = MinMaxScaler()
content_data_scaled = scaler.fit_transform(content_data[['likes', 'comments', 'favorites']])

# 合并内容特征
content_features = pd.concat([pd.DataFrame(content_data_scaled), pd.DataFrame(content_data_encoded.toarray())], axis=1)

# 基于用户的协同过滤
# 构建用户-内容矩阵
user_content_matrix = user_behavior.pivot(index='user_id', columns='content_id', values='interaction').fillna(0)

# 训练KNN模型
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(user_content_matrix)


# 为新用户推荐内容
def recommend_for_user(user_id, num_recommendations=10):
    user_vector = user_content_matrix.loc[user_id].values.reshape(1, -1)
    distances, indices = knn.kneighbors(user_vector, n_neighbors=num_recommendations + 1)
    recommended_content_ids = [user_content_matrix.columns[i] for i in indices.flatten()[1:]]
    return recommended_content_ids


# 示例：为用户ID为1的用户推荐内容
recommended_content = recommend_for_user(1)
print(f"推荐内容ID: {recommended_content}")

In [ ]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
import numpy as np

# 读取数据
content_data = pd.read_csv('content_data.csv')
user_data = pd.read_csv('user_data.csv')
interaction_data = pd.read_csv('interaction_data.csv')


# 数据预处理
def preprocess_data(content_data, user_data, interaction_data):
    # 数据清洗、缺失值处理、独热编码等
    pass


# 特征提取
def extract_features(content_data):
    # 提取内容特征
    vectorizer = TfidfVectorizer()
    content_features = vectorizer.fit_transform(content_data['content'])
    return content_features


# 用户画像构建
def build_user_profile(user_data, interaction_data):
    user_profiles = {}
    for user in user_data['user_id']:
        user_interactions = interaction_data[interaction_data['user_id'] == user]
        user_profiles[user] = user_interactions['content_id'].tolist()
    return user_profiles


# 推荐算法
def recommend(user_id, user_profiles, content_features):
    user_profile = user_profiles.get(user_id, [])
    if not user_profile:
        return []

    user_content_features = content_features[user_profile].mean(axis=0)
    similarity = cosine_similarity(user_content_features, content_features)
    recommended_content = np.argsort(similarity)[::-1]
    return recommended_content


# 数据预处理
preprocess_data(content_data, user_data, interaction_data)

# 特征提取
content_features = extract_features(content_data)

# 用户画像构建
user_profiles = build_user_profile(user_data, interaction_data)

# 推荐
user_id = 'some_user_id'
recommendations = recommend(user_id, user_profiles, content_features)
print(f"Recommendations for user {user_id}: {recommendations}")
