### Chatgpt 写的 CAGED小模型

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# 假设我们有一些数据
class KGDataset(Dataset):
    def __init__(self, triples):
        self.triples = triples

    def __len__(self):
        return len(self.triples)

    def __getitem__(self, idx):
        return self.triples[idx]

# 一个简单的编码器示例
class Encoder(nn.Module):
    def __init__(self, embedding_dim):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(1000, embedding_dim)
        self.fc = nn.Linear(embedding_dim, embedding_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = torch.relu(self.fc(x))
        return x

# CAGED模型
class CAGED(nn.Module):
    def __init__(self, embedding_dim):
        super(CAGED, self).__init__()
        self.encoder = Encoder(embedding_dim)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, pos_triples, neg_triples):
        pos_h, pos_r, pos_t = pos_triples[:, 0], pos_triples[:, 1], pos_triples[:, 2]
        neg_h, neg_r, neg_t = neg_triples[:, 0], neg_triples[:, 1], neg_triples[:, 2]

        pos_h_emb = self.encoder(pos_h)
        pos_r_emb = self.encoder(pos_r)
        pos_t_emb = self.encoder(pos_t)

        neg_h_emb = self.encoder(neg_h)
        neg_r_emb = self.encoder(neg_r)
        neg_t_emb = self.encoder(neg_t)

        pos_score = torch.sum(pos_h_emb * pos_r_emb * pos_t_emb, dim=1)
        neg_score = torch.sum(neg_h_emb * neg_r_emb * neg_t_emb, dim=1)

        scores = torch.cat([pos_score, neg_score], dim=0)
        labels = torch.cat([torch.ones(pos_score.size(0)), torch.zeros(neg_score.size(0))], dim=0).long()

        return scores, labels

    def compute_loss(self, scores, labels):
        return self.criterion(scores, labels)

# 数据和训练参数
triples = torch.tensor([
    [1, 2, 3],
    [4, 5, 6],
    # 添加更多的三元组
])

dataset = KGDataset(triples)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# 创建模型、优化器
embedding_dim = 50
model = CAGED(embedding_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练循环
for epoch in range(10):
    for pos_triples in dataloader:
        neg_triples = pos_triples[torch.randperm(pos_triples.size(0))]
        
        optimizer.zero_grad()
        scores, labels = model(pos_triples, neg_triples)
        loss = model.compute_loss(scores, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch}, Loss: {loss.item()}")


RuntimeError: Expected floating point type for target with class probabilities, got Long

### TransE Demo   
Input：triples = [(0, 0, 1), (2, 1, 3), (4, 2, 5)....] int triple info   
Output：dim=50 tensor

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class TransE(nn.Module):
    def __init__(self, num_entities, num_relations, embedding_dim, margin=1.0):
        super(TransE, self).__init__()
        self.embedding_dim = embedding_dim
        self.margin = margin
        self.entity_embeddings = nn.Embedding(num_entities, embedding_dim)
        self.relation_embeddings = nn.Embedding(num_relations, embedding_dim)
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.entity_embeddings.weight.data)
        nn.init.xavier_uniform_(self.relation_embeddings.weight.data)

    def forward(self, h, r, t, h_neg, t_neg):
        h_emb = self.entity_embeddings(h)
        r_emb = self.relation_embeddings(r)
        t_emb = self.entity_embeddings(t)
        h_neg_emb = self.entity_embeddings(h_neg)
        t_neg_emb = self.entity_embeddings(t_neg)

        pos_distance = torch.norm(h_emb + r_emb - t_emb, p=2, dim=1)
        neg_distance = torch.norm(h_neg_emb + r_emb - t_neg_emb, p=2, dim=1)

        return pos_distance, neg_distance

    def loss(self, pos_distance, neg_distance):
        return torch.sum(torch.relu(self.margin + pos_distance - neg_distance))

def generate_negative_samples(triples, num_entities):
    neg_triples = []
    for (h, r, t) in triples:
        if np.random.rand() > 0.5:
            h_neg = np.random.randint(num_entities)
            while h_neg == h:
                h_neg = np.random.randint(num_entities)
            neg_triples.append((h_neg, r, t))
        else:
            t_neg = np.random.randint(num_entities)
            while t_neg == t:
                t_neg = np.random.randint(num_entities)
            neg_triples.append((h, r, t_neg))
    return neg_triples

def train_model(triples, num_entities, num_relations, embedding_dim=50, margin=1.0, learning_rate=0.01, num_epochs=1000, batch_size=128):
    model = TransE(num_entities, num_relations, embedding_dim, margin)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    triple_indices = np.arange(len(triples))
    for epoch in range(num_epochs):
        np.random.shuffle(triple_indices)
        total_loss = 0

        for i in range(0, len(triples), batch_size):
            batch_indices = triple_indices[i:i + batch_size]
            batch_triples = [triples[idx] for idx in batch_indices]
            batch_neg_triples = generate_negative_samples(batch_triples, num_entities)

            h = torch.tensor([triple[0] for triple in batch_triples], dtype=torch.long)
            r = torch.tensor([triple[1] for triple in batch_triples], dtype=torch.long)
            t = torch.tensor([triple[2] for triple in batch_triples], dtype=torch.long)
            h_neg = torch.tensor([triple[0] for triple in batch_neg_triples], dtype=torch.long)
            r_neg = torch.tensor([triple[1] for triple in batch_neg_triples], dtype=torch.long)
            t_neg = torch.tensor([triple[2] for triple in batch_neg_triples], dtype=torch.long)

            pos_distance, neg_distance = model(h, r, t, h_neg, t_neg)
            loss = model.loss(pos_distance, neg_distance)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss/len(triples)}')

    return model

# Example usage
num_entities = 1000  # Example number of entities
num_relations = 100  # Example number of relations
triples = [(0, 0, 1), (2, 1, 3), (4, 2, 5)]  # Example triples

trained_model = train_model(triples, num_entities, num_relations)

Epoch 1/1000, Loss: 0.9760046005249023
Epoch 2/1000, Loss: 0.9848268826802572
Epoch 3/1000, Loss: 0.8913046518961588
Epoch 4/1000, Loss: 0.9329829216003418
Epoch 5/1000, Loss: 0.8146908283233643
Epoch 6/1000, Loss: 0.7400547663370768
Epoch 7/1000, Loss: 0.618378480275472
Epoch 8/1000, Loss: 0.6130247116088867
Epoch 9/1000, Loss: 0.6061376333236694
Epoch 10/1000, Loss: 0.5742376645406088
Epoch 11/1000, Loss: 0.5345075925191244
Epoch 12/1000, Loss: 0.427237590154012
Epoch 13/1000, Loss: 0.38709986209869385
Epoch 14/1000, Loss: 0.450755516688029
Epoch 15/1000, Loss: 0.34282676378885907
Epoch 16/1000, Loss: 0.2632403572400411
Epoch 17/1000, Loss: 0.3119153579076131
Epoch 18/1000, Loss: 0.32717124621073407
Epoch 19/1000, Loss: 0.2514731089274089
Epoch 20/1000, Loss: 0.3188825845718384
Epoch 21/1000, Loss: 0.21658907334009805
Epoch 22/1000, Loss: 0.14694501956303915
Epoch 23/1000, Loss: 0.12546896934509277
Epoch 24/1000, Loss: 0.12484625975290935
Epoch 25/1000, Loss: 0.07940481106440227
Epoc