In [2]:
import pandas as pd
import networkx as nx
from node2vec import Node2Vec
import pickle

# 读取数据
coat_info_df = pd.read_csv('../../data/coat/coat_info.csv')

# 初始化空图
G = nx.Graph()

# ============ 添加用户节点和用户特征节点 ============
for _, row in coat_info_df.iterrows():
    user_id = int(row['user'])  # 确保 user_id 为整数
    gender = int(row['gender'])
    age = int(row['age'])  # 确保 age 为整数
    location = int(row['location'])
    
    # 添加用户节点
    user_node = f"user_{user_id}"
    if not G.has_node(user_node):
        G.add_node(user_node, type="user")
    
    # 添加性别特征节点并连接到用户节点
    gender_node = f"gender_{gender}"
    if not G.has_node(gender_node):
        G.add_node(gender_node, type="gender")
    G.add_edge(user_node, gender_node, relation="has_gender")
    
    # 添加年龄特征节点并连接到用户节点
    age_node = f"age_{age}"
    if not G.has_node(age_node):
        G.add_node(age_node, type="age")
    G.add_edge(user_node, age_node, relation="has_age")
    
    # 添加位置特征节点并连接到用户节点
    location_node = f"location_{location}"
    if not G.has_node(location_node):
        G.add_node(location_node, type="location")
    G.add_edge(user_node, location_node, relation="has_location")

# ============ 添加外套节点及其评分关系 ============
for _, row in coat_info_df.iterrows():
    user_id = int(row['user'])  # 确保 user_id 为整数
    item_id = int(row['item'])  # 确保 item_id 为整数
    rating = row['rating']
    
    # 添加外套节点
    item_node = f"item_{item_id}"
    if not G.has_node(item_node):
        G.add_node(item_node, type="item")
    
    # 添加用户与外套的评分关系（仅保留高评分）
    if rating >= 4:
        user_node = f"user_{user_id}"
        G.add_edge(user_node, item_node, relation="rated", rating=rating)

# 打印图的基本信息
print(f"图中的节点数：{G.number_of_nodes()}")
print(f"图中的边数：{G.number_of_edges()}")
print(f"节点示例：{list(G.nodes(data=True))[:10]}")
print(f"边示例：{list(G.edges(data=True))[:10]}")
'''
# ============ 使用 node2vec 进行随机游走 ============
node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, p=1, q=1, workers=4)

# 训练嵌入模型
model = node2vec.fit(window=10, min_count=1, batch_words=4)

# 提取每个节点的嵌入向量
embeddings = {str(node): model.wv[str(node)] for node in G.nodes()}

# 查看用户1的嵌入
print("user_1的嵌入：", embeddings["user_1"])

# ============ 保存模型和嵌入向量 ============
# 保存 node2vec 模型
model.save("coat_model.model")
print("模型已保存为 coat_model.model")

# 保存节点嵌入向量
with open("coat_embeddings.pkl", "wb") as f:
    pickle.dump(embeddings, f)
print("嵌入向量已保存为 coat_embeddings.pkl")
'''
# 定义一个函数来生成三元组
def generate_triples(G):
    triples = []
    for u, v, data in G.edges(data=True):
        # 获取节点的类型信息
        u_type = G.nodes[u]['type']
        v_type = G.nodes[v]['type']
        
        # 构建三元组
        triple = (u, data['relation'], v)
        triples.append(triple)
    
    return triples

# 生成三元组
triples = generate_triples(G)

# 打印三元组个数
print(f"图中的三元组个数：{len(triples)}")

图中的节点数：601
图中的边数：870
节点示例：[('user_21', {'type': 'user'}), ('gender_1', {'type': 'gender'}), ('age_0', {'type': 'age'}), ('location_1', {'type': 'location'}), ('user_31', {'type': 'user'}), ('age_2', {'type': 'age'}), ('user_173', {'type': 'user'}), ('gender_0', {'type': 'gender'}), ('age_3', {'type': 'age'}), ('user_114', {'type': 'user'})]
边示例：[('user_21', 'gender_1', {'relation': 'has_gender'}), ('user_21', 'age_0', {'relation': 'has_age'}), ('user_21', 'location_1', {'relation': 'has_location'}), ('gender_1', 'user_31', {'relation': 'has_gender'}), ('gender_1', 'user_114', {'relation': 'has_gender'}), ('gender_1', 'user_117', {'relation': 'has_gender'}), ('gender_1', 'user_79', {'relation': 'has_gender'}), ('gender_1', 'user_82', {'relation': 'has_gender'}), ('gender_1', 'user_8', {'relation': 'has_gender'}), ('gender_1', 'user_75', {'relation': 'has_gender'})]
图中的三元组个数：870


In [2]:
import json
from gensim.models import Word2Vec

# 加载模型
model = Word2Vec.load("coat_model.model")
print("模型加载成功")

# 初始化重新编号的用户和物品字典
user_embeddings = {}
item_embeddings = {}

# 初始化用户和物品的编号映射表
user_id_map = {}
item_id_map = {}

# 用于重新编号的计数器
user_counter = 0
item_counter = 0

# 遍历模型中的所有节点
for node in model.wv.key_to_index:
    embedding = model.wv[node].tolist()
    if node.startswith("user_"):
        # 重新编号用户
        if node not in user_id_map:
            user_id_map[node] = user_counter
            user_counter += 1
        # 使用新的编号存储用户的嵌入
        user_embeddings[user_id_map[node]] = embedding
    elif node.startswith("item_"):
        # 重新编号物品
        if node not in item_id_map:
            item_id_map[node] = item_counter
            item_counter += 1
        # 使用新的编号存储物品的嵌入
        item_embeddings[item_id_map[node]] = embedding

# 保存用户嵌入为 JSON 文件
with open("user_embeddings.json", "w") as user_file:
    json.dump(user_embeddings, user_file, indent=4)
print("重新编号的用户嵌入已保存为 user_embeddings.json")

# 保存物品嵌入为 JSON 文件
with open("item_embeddings.json", "w") as item_file:
    json.dump(item_embeddings, item_file, indent=4)
print("重新编号的物品嵌入已保存为 renumbered_item_embeddings.json")

# 保存用户和物品的编号映射
with open("user_id_map.json", "w") as user_map_file:
    json.dump({k: v for k, v in user_id_map.items()}, user_map_file, indent=4)
with open("item_id_map.json", "w") as item_map_file:
    json.dump({k: v for k, v in item_id_map.items()}, item_map_file, indent=4)
print("用户和物品的编号映射已分别保存为 user_id_map.json 和 item_id_map.json")


模型加载成功
重新编号的用户嵌入已保存为 user_embeddings.json
重新编号的物品嵌入已保存为 renumbered_item_embeddings.json
用户和物品的编号映射已分别保存为 user_id_map.json 和 item_id_map.json
