In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CompGCNLayer(nn.Module):
    def __init__(self, in_dim, out_dim, num_rels, act=F.relu, comp_op="sub"):
        super(CompGCNLayer, self).__init__()

        self.in_dim = in_dim
        self.out_dim = out_dim
        self.num_rels = num_rels
        self.act = act
        self.comp_op = comp_op

        # Linear transform cho node update
        self.w_node = nn.Linear(in_dim, out_dim, bias=False)

        # Relation transform
        self.w_rel = nn.Linear(in_dim, out_dim, bias=False)

        # Self loop
        self.loop_weight = nn.Parameter(torch.Tensor(in_dim, out_dim))
        nn.init.xavier_uniform_(self.loop_weight)

        # Bias
        self.bias = nn.Parameter(torch.zeros(out_dim))

    def comp(self, h, r):
        if self.comp_op == "sub":
            return h - r
        elif self.comp_op == "mul":
            return h * r
        elif self.comp_op == "corr":
            return self.circular_correlation(h, r)
        else:
            raise NotImplementedError("Unknown composition operator")

    def circular_correlation(self, h, r):
        fft_h = torch.fft.fft(h)
        fft_r = torch.fft.fft(r)
        return torch.real(torch.fft.ifft(fft_h * torch.conj(fft_r)))

    def forward(self, x, edge_index, edge_type, rel_embed):
        num_nodes = x.size(0)
        num_edges = edge_index.size(1)

        # Edge sources and targets
        src, dst = edge_index

        # Relation embedding for each edge
        rel = rel_embed[edge_type]  # shape: [num_edges, dim]

        # Compose src node with relation
        comp_features = self.comp(x[src], rel)  # [num_edges, in_dim]

        # Message passing: accumulate features to dst node
        msg = torch.zeros((num_nodes, self.in_dim), device=x.device)
        msg = msg.index_add(0, dst, comp_features)

        # Self-loop contribution
        loop_msg = torch.matmul(x, self.loop_weight)

        # Final node update
        out = self.w_node(msg) + loop_msg + self.bias
        return self.act(out)


class CompGCNModel(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_rels, comp_op="sub"):
        super().__init__()
        self.layer1 = CompGCNLayer(in_dim, hidden_dim, num_rels, act=F.relu, comp_op=comp_op)
        self.layer2 = CompGCNLayer(hidden_dim, out_dim, num_rels, act=lambda x: x, comp_op=comp_op)

    def forward(self, x, edge_index, edge_type, rel_embed):
        x = self.layer1(x, edge_index, edge_type, rel_embed)
        x = self.layer2(x, edge_index, edge_type, rel_embed)
        return x  # Final node embeddings


Chuẩn bị data: cặp (job, candidate, label)

In [2]:
import torch
def create_train_pairs(edges_df, job_node_ids, candidate_node_ids, relation_has_exp):
    pos_pairs = edges_df[edges_df['relation_id'] == relation_has_exp][['tail_node_id', 'head_node_id']].values
    pos_labels = torch.ones(len(pos_pairs), dtype=torch.float)

    # Sinh negative: chọn candidate khác chưa có quan hệ
    import random
    neg_pairs = []
    for job_id, cand_id in pos_pairs:
        while True:
            neg_cand = random.choice(candidate_node_ids)
            if not ((edges_df['head_node_id'] == job_id) & 
                    (edges_df['tail_node_id'] == neg_cand) &
                    (edges_df['relation_id'] == relation_has_exp)).any():
                neg_pairs.append((job_id, neg_cand))
                break
    neg_labels = torch.zeros(len(neg_pairs), dtype=torch.float)

    all_pairs = torch.tensor(list(pos_pairs) + neg_pairs)
    all_labels = torch.cat([pos_labels, neg_labels], dim=0)

    return all_pairs, all_labels


Loss

In [3]:
import torch.nn.functional as F

def cosine_loss(embeddings, pairs, labels):
    job_emb = embeddings[pairs[:, 0]]
    cand_emb = embeddings[pairs[:, 1]]

    cos_sim = F.cosine_similarity(job_emb, cand_emb)
    loss = F.binary_cross_entropy_with_logits(cos_sim, labels)
    return loss


Training loop đơn giản

In [None]:
model = CompGCNModel(in_dim=768, hidden_dim=256, out_dim=128,
                     num_rels=relation_embed_tensor.shape[0])
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1):
    model.train()
    optimizer.zero_grad()

    node_embeddings = model(x, edge_index, edge_type, relation_embed_tensor)
    pairs, labels = create_train_pairs(edges_df, job_node_ids, candidate_node_ids)

    loss = cosine_loss(node_embeddings, pairs, labels)
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch} | Loss: {loss.item():.4f}")
