<a href="https://colab.research.google.com/github/felixxuu/Illinois_2025/blob/main/Tri_model_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

install Pytorch Geometric



In [32]:
!pip install torch torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.7.0


# **TEST:基础三元GNN模型**

 **导入库**

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np

**定义数据集类**

In [34]:
class TripleDataset(Dataset):
  """知识图谱三元数据集"""
  def __init__(self, triples):
    self.triples = torch.LongTensor(triples)

  def __len__(self):
    return len(self.triples)

  def __getitem__(self, index):
    return self.triples[index]

**定义TransE模型**

In [35]:
class TransE(nn.Module):
  def __init__(self, num_entities, num_relations, embedding_dim=100,margin=1.0):
    super(TransE, self).__init__()

    self.num_entities = num_entities
    self.num_relations = num_relations
    self.embedding_dim = embedding_dim
    self.margin = margin

    # entity & relation embedding
    self.entity_embeddings = nn.Embedding(num_entities, embedding_dim)
    self.relation_embeddings = nn.Embedding(num_relations, embedding_dim)

    # initialization
    self._initialize_embeddings()

  def _initialize_embeddings(self):
    nn.init.xavier_uniform_(self.entity_embeddings.weight.data)
    nn.init.xavier_uniform_(self.relation_embeddings.weight.data)
    self.entity_embeddings.weight.data = F.normalize(self.entity_embeddings.weight.data, p=2, dim=1)

  def forward(self, heads, relations, tails):
    # 获取嵌入
    h = self.entity_embeddings(heads)
    r = self.relation_embeddings(relations)
    t = self.entity_embeddings(tails)

    # 归一化
    h = F.normalize(h, p = 2, dim = 1)
    t = F.normalize(t, p = 2, dim = 1)

    # 计算距离
    score = torch.norm(h + r -t, p = 2 , dim = 1)

    return score

  def generate_negative_samples(self, heads,relations,tails):
    batch_size = heads.size(0)
    device = heads.device

    mask = torch.rand(batch_size, device = device) < 0.5
    random_entities = torch.randint(0, self.num_entities, (batch_size,),device = device)

    neg_heads = torch.where(mask, random_entities, heads)
    neg_tails = torch.where(mask, random_entities, tails)
    neg_relations = relations.clone()

    return neg_heads, neg_relations, neg_tails

  def loss(self, pos_heads, pos_relations, pos_tails):
     """计算 margin-based ranking loss"""
     pos_scores = self.forward(pos_heads, pos_relations, pos_tails)
     neg_heads, neg_relations, neg_tails = self.generate_negative_samples(
         pos_heads,pos_relations,pos_tails
     )
     neg_scores = self.forward(neg_heads, neg_relations, neg_tails)
     loss = torch.mean(F.relu(self.margin + pos_scores - neg_scores))
     return loss


**准备数据**

In [36]:
# 设置随机种子
torch.manual_seed(42)
np.random.seed(42)

# Example: 三元组数据
triples = [
    (0,0,1),(1,1,2),(2,0,3),(0,2,2),
    (1,0,3),(3,1,0),(2,2,1),(0,1,3),
    (3,0,0),(2,1,0),(1,2,3),(0,0,3)
]
# 创建数据集
dataset = TripleDataset(triples)

dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

**初始化模型和优化器**

In [37]:
# 模型参数
num_entities = 4
num_relations = 3
embedding_dim = 50

# 创建模型
model = TransE(num_entities, num_relations, embedding_dim, margin=1.0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

**训练模型**

In [38]:
num_epochs = 100

print("start training")

for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:
        heads = batch[:, 0]
        relations = batch[:, 1]
        tails = batch[:, 2]
        loss = model.loss(heads,relations, tails)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

  # entities embedding normalization
        model.entity_embeddings.weight.data = F.normalize(
        model.entity_embeddings.weight.data, p = 2, dim = 1)

        total_loss += loss.item()

if (epoch + 1) % 10 == 0:
  print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

print("End")


start training
Epoch [100/100], Loss: 0.5000
End


**测试模型**

In [39]:
model.eval()
test_triples = [
    (0,0,1,"positive"),
    (0,0,2,"negative"),
    (1,1,2,"negative"),
    (1,1,0,"positive")
]

print("\nTest results:")
with torch.no_grad():
  for h, r, t, desc in test_triples:
    score = model(torch.LongTensor([h]),torch.LongTensor([r]), torch.LongTensor([t]))
    print(f"{desc}({h},{r},{t}):{score.item():.4f}")


Test results:
positive(0,0,1):2.5294
negative(0,0,2):3.2296
negative(1,1,2):2.6634
positive(1,1,0):2.0873
