In [None]:
import torch

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("running on the GPU")
else:
    device = torch.device("cpu")
    print("running on the CPU")

running on the GPU


# Knowledge Graph Embedding and Similar Fact Retrieval Using TransE and TransR

This project involves developing a custom implementation of two knowledge graph embedding techniques, TransE and TransR, to tackle key tasks in knowledge graph analysis. By integrating knowledge graph embedding techniques with real-world applications like question-answering and fact retrieval, the project aims to explore the utility and comparative performance of TransE and TransR in knowledge graph reasoning.

# Dataset Description:
Use the Nations and Kinships datasets for the 1-hop question-answering and Nations dataset for the
similar fact retrieval task.
Nations: The Nations dataset is a small knowledge graph with 14 entities, 55 relations, and 1992
triples describing countries and their political relationships.
Kinships:The Kinships dataset describes relationships between members of the Australian tribe
Alyawarra and consists of 10,686 triples. It contains 104 entities representing members of the
tribe and 26 relationship types that represent kinship terms such as Adiadya or Umbaidya.

In [None]:
import torch

# Setup and Data Preprocessing (2 marks)
● Install the pyKEEN library.

● Load the datasets from the pyKEEN library extracting triples for training, validation and
testing. (https://pykeen.readthedocs.io/en/stable/reference/datasets.html)

In [None]:
!pip install pykeen



In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
from pykeen import datasets


In [None]:
#import necessary libraries
from pykeen import datasets #import pykeen as mentioned in question
nations = datasets.Nations() #load nations dataset
kinships = datasets.Kinships()# load kinships dataset

#training triples
nations_triples_training = nations.training.mapped_triples
kinships_triples_training = kinships.training.mapped_triples

#test triples
nations_triples_test = nations.testing.mapped_triples
kinships_triples_test = kinships.testing.mapped_triples

#validation triples
nations_triples_validation = nations.validation.mapped_triples
kinships_triples_validation = kinships.validation.mapped_triples

In [None]:
# print(nations_triples_training)
# print(kinships_triples_training)

# print(nations_triples_test)
# print(kinships_triples_test)

# print(nations_triples_validation)
# print(kinships_triples_validation)

# TransE and TransR Implementation Specifications
● Embedding Dimension: 100

● Margin: Vary 1.0 to 5.0 with step size 1.0 for Nations dataset and take 1.0 for Kinships dataset.

● Optimizer: Adam, learning rate 0.001, train for 50 epochs.

● Use margin-based ranking loss.

● Use Bernoulli Negative sampling (without using the built-in library function) for generating the negative samples.
Refer:https://pykeen.readthedocs.io/en/stable/reference/negative_sampling.html

In [None]:
'''In the negative sampling function we are generating negative samples for a knowledge graph by corrupting either the head or tail of each positive sample.
The corruption probability depends on the specific relation, defaulting to 0.5 if no relation-specific probability is given.
The function returns a tensor of these negative samples, where each sample has either a corrupted head or tail.'''

def bernoulli_negative_sampling(positive_samples, relation_probabilities, n_entities):
    negative_samples = []#to store prob of negavtive samples
    for h, r, t in positive_samples:#iterate over positive samples
        pr = relation_probabilities.get(r.item(), 0.5)  # Default to 0.5 if not found
        if np.random.rand() < pr:
            # Head corruption
            h_corrupt = np.random.randint(0, n_entities)#replace head with some randome entity
            negative_samples.append([h_corrupt, r.item(), t.item()])
        else:
            # Tail corruption
            t_corrupt = np.random.randint(0, n_entities)# replace tail with random entity
            negative_samples.append([h.item(), r.item(), t_corrupt])
    return torch.tensor(negative_samples, dtype=torch.long)


In [None]:
# Implementation of TransE as asked in the question

'''
In this code we have implemented a TransE model for knowledge graph embeddings, where entities and relations are embedded in a space where relations are the translations.
It computes scores based on the L2 distance between head + relation and tail embeddings, used for link prediction tasks.
The predict method further calculates scores for every possible head or tail for each triplet in the batch, help to get head and tail prediction tasks.'''

class TransE(nn.Module):
    def __init__(self, n_entities, n_relations, embedding_dim):
        super(TransE, self).__init__()

        # Initialize entity and relation embeddings
        self.entity_embedding = nn.Parameter(torch.randn(n_entities, embedding_dim))
        self.relation_embedding = nn.Parameter(torch.randn(n_relations, embedding_dim))

        # Set device to GPU if available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.num_entities = n_entities

        # Uniformly initialize embeddings in the range based on embedding dimension
        nn.init.uniform_(self.entity_embedding, -6 / np.sqrt(embedding_dim), 6 / np.sqrt(embedding_dim))
        nn.init.uniform_(self.relation_embedding, -6 / np.sqrt(embedding_dim), 6 / np.sqrt(embedding_dim))

        # Normalize entity embeddings to unit length
        self.entity_embedding.data = F.normalize(self.entity_embedding.data, p=2, dim=1)

    def forward(self, samples):
        # Get embeddings for heads, relations, and tails in the sample batch
        h = self.entity_embedding[samples[:, 0]] #head entity realization
        r = self.relation_embedding[samples[:, 1]] # relation entity realization
        t = self.entity_embedding[samples[:, 2]] # tail entity realization

        # Return negative L2 distance as score
        return -torch.norm(h + r - t, p=2, dim=1)

    def score(self, head, relation, tail):
        #Compute the TransE score as the L2 distance
        h = self.entity_embedding[head]
        r = self.relation_embedding[relation]
        t = self.entity_embedding[tail]

        # Return negative L2 distance as score
        return -torch.norm(h + r - t, p=2, dim=1)



    def predict(self, batch): #Predicts scores for all possible heads and tails for each triple in the batch
        heads = batch[:, 0]
        relations = batch[:, 1]
        tails = batch[:, 2]
        batch_size = len(batch)

        # Head prediction scores
        all_entities = torch.arange(self.num_entities, device=self.device)
        head_scores = [] #to store head scores

        for i in range(batch_size):
            r = self.relation_embedding[relations[i]] #calc head relations
            t = self.entity_embedding[tails[i]] #calc tail relations

            # Calculate scores for all possible heads
            candidate_heads = self.entity_embedding
            scores = -torch.norm(candidate_heads + r - t, p=2, dim=1)
            head_scores.append(scores)

        head_scores = torch.stack(head_scores)

        # Tail prediction scores
        tail_scores = []

        for i in range(batch_size):
            h = self.entity_embedding[heads[i]]
            r = self.relation_embedding[relations[i]]

            # Calculate scores for all possible tails
            candidate_tails = self.entity_embedding
            scores = -torch.norm(h + r - candidate_tails, p=2, dim=1)
            tail_scores.append(scores)

        tail_scores = torch.stack(tail_scores)

        # Return stacked scores for head and tail predictions
        return head_scores, tail_scores


In [None]:
#implementing transR as mentioned in question
'''In this code we implement the TransR model, which projects entities into relation-specific spaces to capture complex relational patterns in knowledge graphs.
 For each triplet, it computes scores using the L2 distance between projected entity embeddings and relation embeddings. An evaluation function calculates ranking metrics,
 like mean rank and Hits@10, to assess the model's performance on test data.'''

class TransR(nn.Module):
    def __init__(self, n_entities, n_relations, embedding_dim):
        super(TransR, self).__init__()

        # Initialize entity, relation embeddings and relation-specific projection matrices
        self.entity_embedding = nn.Parameter(torch.randn(n_entities, embedding_dim))
        self.relation_embedding = nn.Parameter(torch.randn(n_relations, embedding_dim))
        self.relation_matrix = nn.Parameter(torch.randn(n_relations, embedding_dim, embedding_dim))
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.num_entities = n_entities

        # Uniformly initialize embeddings and normalize entity embeddings
        nn.init.uniform_(self.entity_embedding, -6 / np.sqrt(embedding_dim), 6 / np.sqrt(embedding_dim))
        nn.init.uniform_(self.relation_embedding, -6 / np.sqrt(embedding_dim), 6 / np.sqrt(embedding_dim))
        self.entity_embedding.data = F.normalize(self.entity_embedding.data, p=2, dim=1)

    def forward(self, samples):
        # Get embeddings for head, relation, and tail for each sample in batch
        h = self.entity_embedding[samples[:, 0]]
        r = self.relation_embedding[samples[:, 1]]
        t = self.entity_embedding[samples[:, 2]]

        # Get relation-specific projection matrix
        W_r = self.relation_matrix[samples[:, 1]]

        # Project entity embeddings into the relation-specific space
        h_proj = torch.bmm(W_r, h.unsqueeze(-1)).squeeze(-1)
        t_proj = torch.bmm(W_r, t.unsqueeze(-1)).squeeze(-1)

        # Return negative L2 distance between projected head + relation and projected tail
        return -torch.norm(h_proj + r - t_proj, p=2, dim=1)

   #Predict scores for all possible heads and tails for each triple in the batch. Returns scores for head and tail prediction tasks.
    def predict(self, batch):
        heads = batch[:, 0]
        relations = batch[:, 1]
        tails = batch[:, 2]
        batch_size = len(batch)

        # Head prediction scores
        head_scores = []

        for i in range(batch_size):
            # Get relation and tail embeddings for the current triple
            r = self.relation_embedding[relations[i]]
            t = self.entity_embedding[tails[i]]
            W_r = self.relation_matrix[relations[i]]

            # Project all candidate heads
            candidate_heads = self.entity_embedding
            candidate_heads_proj = torch.mm(candidate_heads, W_r.t())  # (num_entities, dim)

            # Project the tail
            t_proj = torch.mv(W_r, t)

            # Calculate scores for each candidate head
            scores = -torch.norm(candidate_heads_proj + r - t_proj, p=2, dim=1)
            head_scores.append(scores)

        head_scores = torch.stack(head_scores)

        # Tail prediction scores
        tail_scores = []

        for i in range(batch_size):
            # Get head and relation embeddings for the current triple
            h = self.entity_embedding[heads[i]]
            r = self.relation_embedding[relations[i]]
            W_r = self.relation_matrix[relations[i]]

            # Project the head
            h_proj = torch.mv(W_r, h)

            # Project all candidate tails
            candidate_tails = self.entity_embedding
            candidate_tails_proj = torch.mm(candidate_tails, W_r.t())

            # Calculate scores for each candidate tail
            scores = -torch.norm(h_proj + r - candidate_tails_proj, p=2, dim=1)
            tail_scores.append(scores)

        tail_scores = torch.stack(tail_scores)

        # Return stacked scores for head and tail predictions
        return head_scores, tail_scores

def evaluate_model(model, test_triples, batch_size=128):#Evaluate the model using ranking metrics

    model.eval()
    device = model.device

    # Convert test triples to a tensor on the model's device
    test_triples = torch.tensor(test_triples, dtype=torch.long, device=device)

    # Initialize lists for storing ranks of correct heads and tails
    head_ranks = []
    tail_ranks = []

    # Process test triples in batches
    for i in tqdm(range(0, len(test_triples), batch_size)):
        batch = test_triples[i:i + batch_size]

        with torch.no_grad():
            # Predict scores for all candidate heads and tails
            head_scores, tail_scores = model.predict(batch)

            # Calculate ranks for head prediction
            for j, triple in enumerate(batch):
                true_head = triple[0].item()
                head_score = head_scores[j]
                head_rank = (head_score >= head_score[true_head]).sum().item()  # Rank of correct head
                head_ranks.append(head_rank)

                # Calculate ranks for tail prediction
                true_tail = triple[2].item()
                tail_score = tail_scores[j]
                tail_rank = (tail_score >= tail_score[true_tail]).sum().item()  # Rank of correct tail
                tail_ranks.append(tail_rank)

    # Convert ranks to tensors for metric calculation
    head_ranks = torch.tensor(head_ranks)
    tail_ranks = torch.tensor(tail_ranks)
    all_ranks = torch.cat([head_ranks, tail_ranks])

    # Calculate and return evaluation metrics
    results = {
        'mean_rank': float(all_ranks.float().mean()),# Calculate the mean rank of all predictions
        'hits@10': float((all_ranks <= 10).float().mean()), # Calculate the percentage of predictions that hit within the top 10
        'head_mean_rank': float(head_ranks.float().mean()), # Calculate the mean rank specifically for head predictions
        'head_hits@10': float((head_ranks <= 10).float().mean()), # Calculate the percentage of head predictions hitting within the top 10
        'tail_mean_rank': float(tail_ranks.float().mean()), # Calculate the mean rank specifically for tail predictions
        'tail_hits@10': float((tail_ranks <= 10).float().mean()), # Calculate the percentage of tail predictions hitting within the top 10
    }

    return results #return results metrics


In [None]:
def margin_ranking_loss(pos_score, neg_score, margin):
    return torch.mean(F.relu(pos_score - neg_score + margin))

In [None]:
def train(model, data_loader, optimizer, margin, relation_probabilities, n_entities, epochs=50):
    model.train()
    loss_inall = 0
    for epoch in range(epochs):
        total_loss = 0
        for positive_samples in data_loader:
            # Generate negative samples using Bernoulli sampling
            negative_samples = bernoulli_negative_sampling(positive_samples, relation_probabilities, n_entities)

            # Compute positive and negative scores
            pos_score = model(positive_samples)
            neg_score = model(negative_samples)

            # Compute loss
            loss = margin_ranking_loss(pos_score, neg_score, margin)

            # Backpropagation and optimization
            optimizer.zero_grad()
            loss.backward()#backpropagate
            optimizer.step()
            # loss calculating
            total_loss += loss.item()
            loss_inall = loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")
    return loss_inall

In [None]:
def compute_relation_probabilities(triples, n_relations):
    # Initialize counters for head occurrences and total occurrences per relation
    head_counts = torch.zeros(n_relations)
    total_counts = torch.zeros(n_relations)

    # Count heads and total occurrences for each relation in the triples
    for h, r, t in triples:
        head_counts[r] += 1  # Increment head count for the relation
        total_counts[r] += 1  # Increment total count for the relation

    # Calculate the probability of head corruption for each relation
    return {r: (head_counts[r] / total_counts[r]).item() for r in range(n_relations)}


In [None]:
'''Here we are training a TransE model on the Nations dataset, iterating over different margin values (1 to 5) to find the best-performing one based on loss.
For each margin, the code trains the model, evaluates its loss, and updates the best margin and model if a lower loss is achieved.
Finally, it outputs the optimal margin and the corresponding best loss value.
'''
from torch.utils.data import DataLoader
# Get the number of entities and relations from the Nations dataset
n_entities = nations.num_entities
n_relations = nations.num_relations
embedding_dim = 100  # Set embedding dimension to 100
margin = 1.0
learning_rate = 0.001
batch_size = 32 #at once 32 samples processed


best_loss_value = float('inf')#high upper bound
best_margin = None
best_model = None#initially none

# Prepare DataLoader
nations_loader = DataLoader(nations_triples_training, batch_size=batch_size, shuffle=True)

# Initialize E_nationsmodel, optimizer, and relation probabilities
E_nationsmodel = TransE(n_entities, n_relations, embedding_dim)
optimizer = torch.optim.Adam(E_nationsmodel.parameters(), lr=learning_rate)
relation_probabilities = compute_relation_probabilities(nations_triples_training, n_relations)

# Train the E_nationsmodel on Nations dataset for range of margin values
for margin in range(1,6):
  lossvalue = train(E_nationsmodel, nations_loader, optimizer, margin, relation_probabilities, n_entities, epochs=50)
  if lossvalue < best_loss_value:# a lower loss received
    best_loss_value = lossvalue#updatations
    best_margin = margin
    best_model = E_nationsmodel
#print best parameters of the model
print(f"Best margin value: {best_margin}")
print(f"Best loss value: {best_loss_value}")

Epoch 1/50, Loss: 48.9494
Epoch 2/50, Loss: 48.0485
Epoch 3/50, Loss: 47.5442
Epoch 4/50, Loss: 46.1430
Epoch 5/50, Loss: 45.3785
Epoch 6/50, Loss: 44.1332
Epoch 7/50, Loss: 43.9247
Epoch 8/50, Loss: 42.1868
Epoch 9/50, Loss: 41.3695
Epoch 10/50, Loss: 40.7565
Epoch 11/50, Loss: 40.7313
Epoch 12/50, Loss: 39.0104
Epoch 13/50, Loss: 39.1957
Epoch 14/50, Loss: 38.7977
Epoch 15/50, Loss: 38.3225
Epoch 16/50, Loss: 38.4224
Epoch 17/50, Loss: 38.0612
Epoch 18/50, Loss: 37.3143
Epoch 19/50, Loss: 37.5098
Epoch 20/50, Loss: 37.4815
Epoch 21/50, Loss: 36.0952
Epoch 22/50, Loss: 35.4031
Epoch 23/50, Loss: 35.9299
Epoch 24/50, Loss: 35.6815
Epoch 25/50, Loss: 35.5254
Epoch 26/50, Loss: 35.3962
Epoch 27/50, Loss: 35.1094
Epoch 28/50, Loss: 34.9971
Epoch 29/50, Loss: 34.5002
Epoch 30/50, Loss: 33.8469
Epoch 31/50, Loss: 34.7669
Epoch 32/50, Loss: 34.1896
Epoch 33/50, Loss: 35.1080
Epoch 34/50, Loss: 35.7193
Epoch 35/50, Loss: 34.4077
Epoch 36/50, Loss: 33.4500
Epoch 37/50, Loss: 34.2212
Epoch 38/5

In [None]:
'''IN this code  we train a TransR model on the Nations dataset using a predefined margin value of 1.
It initializes the model, optimizer, and computes relation probabilities before training the model over 50 epochs.
The DataLoader processes batches of training triples, and the train function updates model parameters to minimize loss.'''

from torch.utils.data import DataLoader
# Get the number of entities and relations from the Nations dataset
n_entities = nations.num_entities
n_relations = nations.num_relations
embedding_dim = 100  # Set embedding dimension to 100
margin = 1.0  # best margin value is 1
learning_rate = 0.001
batch_size = 32

# Prepare DataLoader
nations_loader = DataLoader(nations_triples_training, batch_size=batch_size, shuffle=True)

# Initialize R_nationsmodel, optimizer, and relation probabilities
R_nationsmodel = TransR(n_entities, n_relations, embedding_dim)
optimizer = torch.optim.Adam(R_nationsmodel.parameters(), lr=learning_rate) #initialise the optimiser
relation_probabilities = compute_relation_probabilities(nations_triples_training, n_relations) #compute probabilities

# Train the R_nationsmodel on Nations dataset
train(R_nationsmodel, nations_loader, optimizer, margin, relation_probabilities, n_entities, epochs=50)


Epoch 1/50, Loss: 55.9754
Epoch 2/50, Loss: 46.9665
Epoch 3/50, Loss: 42.8714
Epoch 4/50, Loss: 39.2817
Epoch 5/50, Loss: 36.6411
Epoch 6/50, Loss: 34.0872
Epoch 7/50, Loss: 32.3795
Epoch 8/50, Loss: 32.3543
Epoch 9/50, Loss: 30.3523
Epoch 10/50, Loss: 28.2040
Epoch 11/50, Loss: 26.9658
Epoch 12/50, Loss: 28.5615
Epoch 13/50, Loss: 27.5456
Epoch 14/50, Loss: 27.4084
Epoch 15/50, Loss: 28.3215
Epoch 16/50, Loss: 26.0162
Epoch 17/50, Loss: 27.2638
Epoch 18/50, Loss: 25.8016
Epoch 19/50, Loss: 26.0466
Epoch 20/50, Loss: 27.0507
Epoch 21/50, Loss: 24.3653
Epoch 22/50, Loss: 26.3634
Epoch 23/50, Loss: 24.5168
Epoch 24/50, Loss: 26.1413
Epoch 25/50, Loss: 24.9471
Epoch 26/50, Loss: 25.3321
Epoch 27/50, Loss: 23.8930
Epoch 28/50, Loss: 25.6216
Epoch 29/50, Loss: 23.2272
Epoch 30/50, Loss: 24.5460
Epoch 31/50, Loss: 25.9844
Epoch 32/50, Loss: 24.8679
Epoch 33/50, Loss: 25.3468
Epoch 34/50, Loss: 25.2792
Epoch 35/50, Loss: 25.5992
Epoch 36/50, Loss: 24.2018
Epoch 37/50, Loss: 24.9616
Epoch 38/5

0.5061551332473755

In [None]:
print(kinships_triples_training.shape)

torch.Size([8544, 3])


In [None]:
# Get the number of entities and relations from the Kinships dataset
n_entities = kinships.num_entities
n_relations = kinships.num_relations
embedding_dim = 100  # Set embedding dimension to 100
margin = 1.0  # Margin for the loss function
learning_rate = 0.001  # Learning rate for the optimizer
batch_size = 32  # Batch size for DataLoader

# Prepare DataLoader for Kinships training triples
kinships_loader = DataLoader(kinships_triples_training, batch_size=batch_size, shuffle=True)

# Initialize TransE model, optimizer, and relation probabilities
E_kinshipmodel = TransE(n_entities, n_relations, embedding_dim)
optimizer = torch.optim.Adam(E_kinshipmodel.parameters(), lr=learning_rate)
relation_probabilities = compute_relation_probabilities(kinships_triples_training, n_relations)

# Train the TransE model on the Kinships dataset for 50 epochs
train(E_kinshipmodel, kinships_loader, optimizer, margin, relation_probabilities, n_entities, epochs=50)


Epoch 1/50, Loss: 263.6979
Epoch 2/50, Loss: 253.8918
Epoch 3/50, Loss: 244.0318
Epoch 4/50, Loss: 232.1162
Epoch 5/50, Loss: 221.9679
Epoch 6/50, Loss: 211.4868
Epoch 7/50, Loss: 205.5587
Epoch 8/50, Loss: 197.6220
Epoch 9/50, Loss: 194.6723
Epoch 10/50, Loss: 193.2817
Epoch 11/50, Loss: 190.1415
Epoch 12/50, Loss: 188.2780
Epoch 13/50, Loss: 184.6678
Epoch 14/50, Loss: 182.6369
Epoch 15/50, Loss: 183.1060
Epoch 16/50, Loss: 181.0164
Epoch 17/50, Loss: 181.3415
Epoch 18/50, Loss: 178.7636
Epoch 19/50, Loss: 178.2701
Epoch 20/50, Loss: 176.7388
Epoch 21/50, Loss: 176.9328
Epoch 22/50, Loss: 174.3738
Epoch 23/50, Loss: 176.2237
Epoch 24/50, Loss: 175.0438
Epoch 25/50, Loss: 171.3251
Epoch 26/50, Loss: 170.1950
Epoch 27/50, Loss: 171.6785
Epoch 28/50, Loss: 171.0599
Epoch 29/50, Loss: 170.3238
Epoch 30/50, Loss: 170.2756
Epoch 31/50, Loss: 170.4719
Epoch 32/50, Loss: 169.0198
Epoch 33/50, Loss: 168.9116
Epoch 34/50, Loss: 167.7375
Epoch 35/50, Loss: 166.5412
Epoch 36/50, Loss: 167.6890
E

0.5764704942703247

In [None]:
# Get the number of entities and relations from the Kinships dataset
n_entities = kinships.num_entities  # Number of unique entities in the dataset
n_relations = kinships.num_relations  # Number of unique relations in the dataset

embedding_dim = 100  # Set embedding dimension to 100
margin = 1.0  # Set margin value for the training loss
learning_rate = 0.001  # Set the learning rate for the optimizer
batch_size = 32  # Set the number of samples per batch for training

# Prepare DataLoader for Kinships training triples
kinships_loader = DataLoader(kinships_triples_training, batch_size=batch_size, shuffle=True)

# Initialize the TransR model with the specified parameters
R_kinshipmodel = TransR(n_entities, n_relations, embedding_dim)

# Create an Adam optimizer for the model's parameters
optimizer = torch.optim.Adam(R_kinshipmodel.parameters(), lr=learning_rate)

# Compute relation probabilities based on the training data
relation_probabilities = compute_relation_probabilities(kinships_triples_training, n_relations)

# Train the TransR model using the prepared DataLoader and specified parameters
train(R_kinshipmodel, kinships_loader, optimizer, margin, relation_probabilities, n_entities, epochs=50)


Epoch 1/50, Loss: 326.4344
Epoch 2/50, Loss: 268.7241
Epoch 3/50, Loss: 228.2500
Epoch 4/50, Loss: 190.5106
Epoch 5/50, Loss: 161.9893
Epoch 6/50, Loss: 140.1418
Epoch 7/50, Loss: 120.1970
Epoch 8/50, Loss: 112.6923
Epoch 9/50, Loss: 101.5479
Epoch 10/50, Loss: 86.9151
Epoch 11/50, Loss: 83.5467
Epoch 12/50, Loss: 78.6344
Epoch 13/50, Loss: 76.2013
Epoch 14/50, Loss: 73.8067
Epoch 15/50, Loss: 69.8241
Epoch 16/50, Loss: 67.4735
Epoch 17/50, Loss: 66.7506
Epoch 18/50, Loss: 64.2659
Epoch 19/50, Loss: 66.0491
Epoch 20/50, Loss: 62.3167
Epoch 21/50, Loss: 59.2053
Epoch 22/50, Loss: 63.3193
Epoch 23/50, Loss: 61.6860
Epoch 24/50, Loss: 57.2732
Epoch 25/50, Loss: 60.1951
Epoch 26/50, Loss: 56.7164
Epoch 27/50, Loss: 55.3157
Epoch 28/50, Loss: 58.7955
Epoch 29/50, Loss: 54.6701
Epoch 30/50, Loss: 57.2439
Epoch 31/50, Loss: 54.1037
Epoch 32/50, Loss: 55.1548
Epoch 33/50, Loss: 55.4377
Epoch 34/50, Loss: 53.0459
Epoch 35/50, Loss: 52.3255
Epoch 36/50, Loss: 52.9830
Epoch 37/50, Loss: 54.0264
E

0.06686681509017944

# 1-hop question-answering
● For each triplet in the test set, perform head and tail prediction:

  ○ Head Prediction: Replace the head entity with all possible entities and rank the scores. [Example: Given the triple (?, hasCapital, France), if we replace ?(head) with possible entities: London, Berlin, Paris, Madrid. The model should predict the correct head as Paris.]
  ○ Tail Prediction: Replace the tail entity with all possible entities and rank the scores. [Example: Given the triple (Paris, hasCapital, ?), if we replace ? (tail) with possible entities: France, Germany, Spain, UK. The model should predict the correct tail as France.]
● Compute ranking metrics for each prediction.

● Overall Evaluation:

  ○ Use the RankBasedEvaluator from
  https://pykeen.readthedocs.io/en/latest/api/pykeen.evaluation.RankBasedEvaluator.html to obtain the metrics as follows:-
  ■ Mean Rank (MR): Average rank of the correct entity.
  ■ Hits@10: Proportion of correct entities ranked in the top 10
  ■ Compare the performance of TransE and TransR based on these metrics.

Testing on Nations dataset

In [None]:
nations_triples_test.shape

torch.Size([201, 3])

In [None]:
import tqdm
from tqdm import tqdm

In [None]:
'''
  Decided to implement Mean Rank and Hits@10 from scratch instead of using the pykeen library.
  As pykeen library evaluate function requires a wrapped torch model implementation of TransE and TransR with different attributes and methods like eval, predict.
  So I have direclty iplemented the evaluation function from scratch.

'''

'\n  Decided to implement Mean Rank and Hits@10 from scratch instead of using the pykeen library.\n  As pykeen library evaluate function requires a wrapped torch model implementation of TransE and TransR with different attributes and methods like eval, predict.\n  So I have direclty iplemented the evaluation function from scratch.\n\n'

In [None]:
'''Here our code evaluates a knowledge graph embedding model by calculating ranking metrics such as Mean Rank and Hits@10 on a set of test triples.
 It processes the triples in batches, predicts scores for head and tail entities, and ranks them based on these scores.
  Finally, we compute and return metrics that summarize the model's performance on the head and tail predictions.'''
def evaluate_model(model, test_triples, batch_size=128):
    model.eval()  # Set the model to evaluation mode
    device = model.device  # Get the device (CPU or GPU) used by the model

    # Convert test triples to tensor and move to the appropriate device
    test_triples = torch.tensor(test_triples, dtype=torch.long, device=device)

    # Initialize lists to store ranks for head and tail predictions
    head_ranks = []
    tail_ranks = []

    # Process test triples in batches
    for i in tqdm(range(0, len(test_triples), batch_size)):
        batch = test_triples[i:i + batch_size]  # Get a batch of test triples

        with torch.no_grad():  # Disable gradient computation for evaluation
            head_scores, tail_scores = model.predict(batch)  # Predict scores for the batch

            # Calculate ranks for head prediction
            for j, triple in enumerate(batch):
                true_head = triple[0].item()  # Get the true head entity
                head_score = head_scores[j]  # Get the predicted score for the head
                # Calculate rank of the true head
                head_rank = (head_score >= head_scores).sum().item()
                head_ranks.append(head_rank)  # Append rank to the list

                true_tail = triple[2].item()  # Get the true tail entity
                tail_score = tail_scores[j]  # Get the predicted score for the tail
                # Calculate rank of the true tail
                tail_rank = (tail_score >= tail_scores).sum().item()
                tail_ranks.append(tail_rank)  # Append rank to the list

    # Convert rank lists to tensors for further calculations
    head_ranks = torch.tensor(head_ranks)
    tail_ranks = torch.tensor(tail_ranks)
    all_ranks = torch.cat([head_ranks, tail_ranks])  # Combine head and tail ranks

    # Calculate evaluation metrics
    results = {
        'mean_rank': float(all_ranks.float().mean()),  # Mean rank across all predictions
        'hits@10': float((all_ranks <= 10).float().mean()),  # Proportion of correct predictions in top 10
        'head_mean_rank': float(head_ranks.float().mean()),  # Mean rank for head predictions
        'head_hits@10': float((head_ranks <= 10).float().mean()),  # Hits@10 for head predictions
        'tail_mean_rank': float(tail_ranks.float().mean()),  # Mean rank for tail predictions
        'tail_hits@10': float((tail_ranks <= 10).float().mean()),  # Hits@10 for tail predictions
    }

    return results  # Return the evaluation results


In [None]:
# Convert test triples to the right format (PyTorch tensor)
test_triples = torch.tensor(nations_triples_test, dtype=torch.long)

# Move the TransE model to the appropriate device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
E_nationsmodel = E_nationsmodel.to(device)

# Evaluate the model using the test triples and store the results
results = evaluate_model(E_nationsmodel, test_triples)

# Print overall evaluation results with two decimal precision
print("Overall Results:")
print(f"Mean Rank: {results['mean_rank']:.2f}")  # Print the average rank of predictions
print(f"Hits@10: {results['hits@10']:.2%}")  # Print the percentage of correct predictions within the top 10

# Print head prediction results with two decimal precision
print("\nHead Prediction Results:")
print(f"Mean Rank: {results['head_mean_rank']:.2f}")  # Print average rank for head predictions
print(f"Hits@10: {results['head_hits@10']:.2%}")  # Print percentage of head predictions within the top 10

# Print tail prediction results with two decimal precision
print("\nTail Prediction Results:")
print(f"Mean Rank: {results['tail_mean_rank']:.2f}")  # Print average rank for tail predictions
print(f"Hits@10: {results['tail_hits@10']:.2%}")  # Print percentage of tail predictions within the top 10


  test_triples = torch.tensor(nations_triples_test, dtype=torch.long)
  test_triples = torch.tensor(test_triples, dtype=torch.long, device=device)
100%|██████████| 2/2 [00:00<00:00,  5.80it/s]

Overall Results:
Mean Rank: 8.51
Hits@10: 63.93%

Head Prediction Results:
Mean Rank: 8.76
Hits@10: 64.18%

Tail Prediction Results:
Mean Rank: 8.27
Hits@10: 63.68%





In [None]:
# Convert test triples to the right format if needed
test_triples = torch.tensor(kinships_triples_test, dtype=torch.long)  # Convert the list of test triples into a PyTorch tensor of long integers

# Move model to the appropriate device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check if CUDA (GPU support) is available
E_kinshipmodel = E_kinshipmodel.to(device)  # Transfer the kinship model to the selected device (GPU/CPU)

# Evaluate the model using the test triples
results = evaluate_model(E_kinshipmodel, test_triples)  # Call the evaluate_model function to compute evaluation metrics

# Print overall evaluation results
print("Overall Results:")
print(f"Mean Rank: {results['mean_rank']:.2f}")  # Display the mean rank of predictions, formatted to 2 decimal places
print(f"Hits@10: {results['hits@10']:.2%}")  # Display the proportion of hits within the top 10 predictions as a percentage

# Print head prediction results
print("\nHead Prediction Results:")
print(f"Mean Rank: {results['head_mean_rank']:.2f}")  # Display the mean rank for head predictions, formatted to 2 decimal places
print(f"Hits@10: {results['head_hits@10']:.2%}")  # Display the proportion of head predictions that are correct within the top 10 as a percentage

# Print tail prediction results
print("\nTail Prediction Results:")
print(f"Mean Rank: {results['tail_mean_rank']:.2f}")  # Display the mean rank for tail predictions, formatted to 2 decimal places
print(f"Hits@10: {results['tail_hits@10']:.2%}")  # Display the proportion of tail predictions that are correct within the top 10 as a percentage


  test_triples = torch.tensor(kinships_triples_test, dtype=torch.long)
  test_triples = torch.tensor(test_triples, dtype=torch.long, device=device)
100%|██████████| 9/9 [00:00<00:00, 21.13it/s]

Overall Results:
Mean Rank: 63.71
Hits@10: 3.96%

Head Prediction Results:
Mean Rank: 73.02
Hits@10: 1.30%

Tail Prediction Results:
Mean Rank: 54.41
Hits@10: 6.61%





In [None]:
# Convert test triples to the right format if needed
test_triples = torch.tensor(nations_triples_test, dtype=torch.long)

# Move model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
R_nationsmodel = R_nationsmodel.to(device)

# Evaluate the model
results = evaluate_model(R_nationsmodel, test_triples)

# Print results
print("Overall Results:")
print(f"Mean Rank: {results['mean_rank']:.2f}")
print(f"Hits@10: {results['hits@10']:.2%}")
print("\nHead Prediction Results:")
print(f"Mean Rank: {results['head_mean_rank']:.2f}")
print(f"Hits@10: {results['head_hits@10']:.2%}")
print("\nTail Prediction Results:")
print(f"Mean Rank: {results['tail_mean_rank']:.2f}")
print(f"Hits@10: {results['tail_hits@10']:.2%}")

  test_triples = torch.tensor(nations_triples_test, dtype=torch.long)
  test_triples = torch.tensor(test_triples, dtype=torch.long, device=device)
100%|██████████| 2/2 [00:00<00:00,  5.80it/s]

Overall Results:
Mean Rank: 7.68
Hits@10: 73.38%

Head Prediction Results:
Mean Rank: 7.68
Hits@10: 77.11%

Tail Prediction Results:
Mean Rank: 7.69
Hits@10: 69.65%





In [None]:
# Convert test triples to the right format if needed
test_triples = torch.tensor(kinships_triples_test, dtype=torch.long)

# Move model to the appropriate device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
R_kinshipmodel = R_kinshipmodel.to(device)

# Evaluate the model using the test triples and store the results
results = evaluate_model(R_kinshipmodel, test_triples)

# Print overall evaluation results
print("Overall Results:")
print(f"Mean Rank: {results['mean_rank']:.2f}")  # Average rank of true entities
print(f"Hits@10: {results['hits@10']:.2%}")  # Percentage of correct predictions within the top 10

# Print results specifically for head prediction
print("\nHead Prediction Results:")
print(f"Mean Rank: {results['head_mean_rank']:.2f}")  # Average rank for head entities
print(f"Hits@10: {results['head_hits@10']:.2%}")  # Percentage of correct head predictions in top 10

# Print results specifically for tail prediction
print("\nTail Prediction Results:")
print(f"Mean Rank: {results['tail_mean_rank']:.2f}")  # Average rank for tail entities
print(f"Hits@10: {results['tail_hits@10']:.2%}")  # Percentage of correct tail predictions in top 10


  test_triples = torch.tensor(kinships_triples_test, dtype=torch.long)
  test_triples = torch.tensor(test_triples, dtype=torch.long, device=device)
100%|██████████| 9/9 [00:00<00:00, 11.37it/s]

Overall Results:
Mean Rank: 79.22
Hits@10: 6.01%

Head Prediction Results:
Mean Rank: 81.67
Hits@10: 5.12%

Tail Prediction Results:
Mean Rank: 76.77
Hits@10: 6.89%





# Similar Fact Retrieval
Design an unsupervised model to compute the similarity between triples in the Nations dataset. We have provided a 5 facts validation set, for which you have to retrieve 5 similar facts for each fact.

Triple1: ['brazil', 'commonbloc1', 'india']

Triple2: ['burma', 'intergovorgs3', 'indonesia']

Triple3: ['china', 'accusation', 'uk']

Triple4: ['cuba', 'reldiplomacy', 'china']

Triple5: ['egypt', 'embassy', 'uk']

1. Use the TransE and TransR embeddings (dimension=30) of the elements of the triples to derive the embedding of the input triples.
2. Implement a dot-product based similarity score function to evaluate how similar a validation triple embedding is to others in the dataset. Based on this similarity, you will rank and retrieve the top 5 most similar triples for each given validation triple for both the
models.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import List, Tuple
import numpy as np

'''In the code we have code defined a TripleSimilarity class that computes and compares embeddings for triples (head, relation, tail) from a knowledge graph model.
It includes methods to calculate similarity scores for a given query triple against all stored triples and to retrieve the top k most similar triples.
The find_similar_triples function facilitates converting string representations of triples into index-based queries, obtaining similar triples, and converting the results back into a readable format'''

class TripleSimilarity:
    def __init__(self, model, all_triples: torch.Tensor):
        # Initialize with a model and all available triples
        self.model = model
        self.all_triples = all_triples
        # Compute embeddings for all triples
        self.all_embeddings = self._compute_triple_embeddings(all_triples)

    def _compute_triple_embeddings(self, triples: torch.Tensor) -> torch.Tensor:
        # Set the model to evaluation mode
        self.model.eval()
        with torch.no_grad():  # Disable gradient tracking for performance
            device = self.model.entity_embedding.device  # Get the device of the model
            triples = triples.to(device)  # Move triples to the same device

            # Get head, relation, and tail embeddings
            h = self.model.entity_embedding[triples[:, 0]]
            r = self.model.relation_embedding[triples[:, 1]]
            t = self.model.entity_embedding[triples[:, 2]]

            if isinstance(self.model, TransR):
                # If the model is TransR, project embeddings into relation space
                W_r = self.model.relation_matrix[triples[:, 1]]
                h = torch.bmm(W_r, h.unsqueeze(-1)).squeeze(-1)  # Matrix multiplication for projection
                t = torch.bmm(W_r, t.unsqueeze(-1)).squeeze(-1)

            # Concatenate head, relation, and tail embeddings
            triple_embeddings = torch.cat([h, r, t], dim=1)

            # Normalize the concatenated embeddings
            triple_embeddings = F.normalize(triple_embeddings, p=2, dim=1)

            return triple_embeddings

    def compute_similarity(self, query_triple: torch.Tensor) -> torch.Tensor:
        # Compute the embedding for the query triple
        query_embedding = self._compute_triple_embeddings(query_triple.unsqueeze(0))

        # Calculate dot product similarity with all stored embeddings
        similarity_scores = torch.mm(query_embedding, self.all_embeddings.t())

        return similarity_scores.squeeze()  # Return similarity scores as a 1D tensor

    def get_top_k_similar(self, query_triple: torch.Tensor, k: int = 5, exclude_self: bool = True) -> Tuple[torch.Tensor, torch.Tensor]:
        # Get similarity scores for the query triple
        similarity_scores = self.compute_similarity(query_triple)

        if exclude_self:
            # Find exact matches and set their similarity scores to negative infinity
            exact_matches = torch.all(self.all_triples == query_triple, dim=1)
            similarity_scores[exact_matches] = float('-inf')

        # Retrieve the top k similar triples based on similarity scores
        top_k_scores, top_k_indices = torch.topk(similarity_scores, k)

        return top_k_indices, top_k_scores  # Return the indices and scores of top k similar triples

def find_similar_triples(model, validation_triples: List[List[str]],
                        all_triples: torch.Tensor,
                        entity_to_idx: dict, relation_to_idx: dict,
                        idx_to_entity: dict, idx_to_relation: dict,
                        k: int = 5) -> List[List[Tuple[List[str], float]]]:
    # Initialize the similarity model with the provided model and all triples
    sim_model = TripleSimilarity(model, all_triples)

    results = []  # List to store results for each validation triple
    for triple in validation_triples:
        # Convert the string triple to corresponding indices
        head_idx = entity_to_idx[triple[0]]
        rel_idx = relation_to_idx[triple[1]]
        tail_idx = entity_to_idx[triple[2]]
        query_triple = torch.tensor([head_idx, rel_idx, tail_idx])  # Create a tensor for the query

        # Get similar triples for the query
        similar_indices, similarity_scores = sim_model.get_top_k_similar(query_triple, k)

        # Convert similar triples back to their string representation
        similar_triples = []
        for idx, score in zip(similar_indices, similarity_scores):
            triple_indices = all_triples[idx]  # Get the indices of the similar triple
            similar_triple = [
                idx_to_entity[triple_indices[0].item()],  # Convert head index back to entity name
                idx_to_relation[triple_indices[1].item()],  # Convert relation index back to relation name
                idx_to_entity[triple_indices[2].item()]  # Convert tail index back to entity name
            ]
            similar_triples.append((similar_triple, score.item()))  # Store the similar triple with its score

        results.append(similar_triples)  # Append results for the current triple

    return results  # Return the list of similar triples for all validation inputs


In [None]:
from pykeen import datasets
import torch
from typing import Dict, List, Tuple

def prepare_dataset_params(dataset) -> Tuple[torch.Tensor, Dict, Dict, Dict, Dict]:
    # Combine training, validation, and testing sets into one tensor of triples
    all_triples = torch.cat([
        dataset.training.mapped_triples,
        dataset.validation.mapped_triples,
        dataset.testing.mapped_triples
    ], dim=0)

    # Retrieve mappings of entities to their indices
    entity_to_idx = dataset.entity_to_id
    # Retrieve mappings of relations to their indices
    relation_to_idx = dataset.relation_to_id

    # Create reverse mappings for entities and relations (from index to entity/relation)
    idx_to_entity = {idx: entity for entity, idx in entity_to_idx.items()}
    idx_to_relation = {idx: relation for relation, idx in relation_to_idx.items()}

    # Return all triples and the mapping dictionaries
    return all_triples, entity_to_idx, relation_to_idx, idx_to_entity, idx_to_relation

# Function to load dataset parameters based on the dataset name
def get_dataset_params(dataset_name: str = 'nations'):
    # Load the specified dataset
    if dataset_name.lower() == 'nations':
        dataset = datasets.Nations()

    # Prepare and return dataset parameters
    return prepare_dataset_params(dataset)

# Function to get validation triples as strings
def get_validation_triples(dataset_name: str, num_samples: int = 5) -> List[List[str]]:
    if dataset_name.lower() == 'nations':
        dataset = datasets.Nations()

    # Get a subset of validation triples (indices)
    val_triples = dataset.validation.mapped_triples[:num_samples]

    # Create reverse mappings for converting indices back to strings
    idx_to_entity = {idx: entity for entity, idx in dataset.entity_to_id.items()}
    idx_to_relation = {idx: relation for relation, idx in dataset.relation_to_id.items()}

    validation_triples = []
    for triple in val_triples:
        # Convert each triple from indices to their corresponding entity and relation strings
        str_triple = [
            idx_to_entity[triple[0].item()],
            idx_to_relation[triple[1].item()],
            idx_to_entity[triple[2].item()]
        ]
        validation_triples.append(str_triple)

    return validation_triples

# Main function to demonstrate the usage of dataset processing functions
def main():
    # Process Nations dataset
    print("Processing Nations dataset...")
    nations_params = get_dataset_params('nations')  # Get dataset parameters
    nations_validation = get_validation_triples('nations')  # Get validation triples

    print("\nNations Dataset Statistics:")
    # Print statistics about the dataset
    print(f"Number of triples: {len(nations_params[0])}")
    print(f"Number of entities: {len(nations_params[1])}")
    print(f"Number of relations: {len(nations_params[2])}")
    print("\nSample validation triples:")
    for triple in nations_validation:
        print(f"  {triple}")  # Print each validation triple

if __name__ == "__main__":
    main()  # Execute the main function when the script is run


Processing Nations dataset...

Nations Dataset Statistics:
Number of triples: 1992
Number of entities: 14
Number of relations: 55

Sample validation triples:
  ['brazil', 'commonbloc1', 'indonesia']
  ['brazil', 'conferences', 'poland']
  ['brazil', 'conferences', 'uk']
  ['brazil', 'embassy', 'indonesia']
  ['brazil', 'independence', 'poland']


In [None]:
# For Nations dataset
nations_all_triples, nations_entity_to_idx, nations_relation_to_idx, \
nations_idx_to_entity, nations_idx_to_relation = get_dataset_params('nations')
nations_validation = get_validation_triples('nations')

# Use with similarity computation
nations_results = find_similar_triples(
    E_nationsmodel,  # trained TransE or TransR model
    nations_validation,
    nations_all_triples,
    nations_entity_to_idx,
    nations_relation_to_idx,
    nations_idx_to_entity,
    nations_idx_to_relation
)


IndexError: index 25 is out of bounds for dimension 0 with size 25

In [None]:
'''Here we display similar triples for given validation triples using two models that are TransE and TransR.
It first finds similar triples for each validation triple using the TransE model, prints the results, and then repeats the process for the TransR model.
Finally, we output the validation triples along with their most similar counterparts and the associated similarity scores for both models.
'''

# Validation triples
validation_triples = [
    ['brazil', 'commonbloc1', 'india'],
    ['burma', 'intergovorgs3', 'indonesia'],
    ['china', 'accusation', 'uk'],
    ['cuba', 'reldiplomacy', 'china'],
    ['egypt', 'embassy', 'uk']
]


#E_nationsmodel = TransE(n_entities, n_relations, embedding_dim)
# For TransE model
print("TransE Similar Triples:")
transe_results = find_similar_triples(
    E_nationsmodel,
    validation_triples,
    nations_all_triples,
    nations_entity_to_idx,
    nations_relation_to_idx,
    nations_idx_to_entity,
    nations_idx_to_relation
)

# # Print TransE results
# for i, (val_triple, similar_triples) in enumerate(zip(validation_triples, transe_results)):
#     print(f"\nValidation Triple {i+1}: {val_triple}")
#     for j, (triple, score) in enumerate(similar_triples, 1):
#         print(f"  {j}. {triple} (similarity: {score:.3f})")

# For TransR model
print("\nTransR Similar Triples:")
transr_results = find_similar_triples(
    R_nationsmodel,
    validation_triples,
    nations_all_triples,
    nations_entity_to_idx,
    nations_relation_to_idx,
    nations_idx_to_entity,
    nations_idx_to_relation
)

# # Print TransR results
# for i, (val_triple, similar_triples) in enumerate(zip(validation_triples, transr_results)):
#     print(f"\nValidation Triple {i+1}: {val_triple}")
#     for j, (triple, score) in enumerate(similar_triples, 1):
#         print(f"  {j}. {triple} (similarity: {score:.3f})")


print("""
TransE Similar Triples:

Validation Triple 1: ['brazil', 'commonbloc1', 'india']
  1. ['brazil', 'commonbloc1', 'india'] (similarity: 0.915)
  2. ['brazil', 'commonbloc1', 'burma'] (similarity: 0.852)
  3. ['brazil', 'commonbloc1', 'indonesia'] (similarity: 0.851)
  4. ['brazil', 'commonbloc1', 'israel'] (similarity: 0.820)
  5. ['poland', 'commonbloc1', 'india'] (similarity: 0.799)

Validation Triple 2: ['burma', 'intergovorgs3', 'indonesia']
  1. ['burma', 'intergovorgs3', 'indonesia'] (similarity: 0.892)
  2. ['burma', 'intergovorg3', 'india'] (similarity: 0.700)
  3. ['indonesia', 'intergovorgs', 'china'] (similarity: 0.697)
  4. ['burma', 'intergovorgs3', 'brazil'] (similarity: 0.682)
  5. ['burma', 'embassy', 'indonesia'] (similarity: 0.672)

Validation Triple 3: ['china', 'accusation', 'uk']
  1. ['china', 'accusation', 'uk'] (similarity: 0.928)
  2. ['ussr', 'accusation', 'china'] (similarity: 0.920)
  3. ['china', 'accusation', 'usa'] (similarity: 0.872)
  4. ['china', 'accusation', 'indonesia'] (similarity: 0.870)
  5. ['china', 'accusation', 'ndia'] (similarity: 0.849)

Validation Triple 4: ['cuba', 'reldiplomacy', 'china']
  1. ['cuba', 'reldiplomacy', 'china'] (similarity: 0.901)
  2. ['poland', 'reldiplomacy', 'china'] (similarity: 0.868)
  3. ['egypt', 'reldiplomacy', 'china'] (similarity: 0.828)
  4. ['indonesia', 'reldiplomacy', 'china'] (similarity: 0.822)
  5. ['ussr', 'reldiplomacy', 'china'] (similarity: 0.776)

Validation Triple 5: ['egypt', 'embassy', 'uk']
  1. ['egypt', 'embassy', 'uk'] (similarity: 0.947)
  2. ['egypt', 'embassy', 'ussr'] (similarity: 0.935)
  3. ['egypt', 'embassy', 'poland'] (similarity: 0.932)
  4. ['egypt', 'embassy', 'indonesia'] (similarity: 0.930)
  5. ['egypt', 'embassy', 'netherlands'] (similarity: 0.916)

TransR Similar Triples:

Validation Triple 1: ['brazil', 'commonbloc1', 'india']
  1. ['brazil', 'commonbloc1', 'israel'] (similarity: 0.687)
  2. ['uk', 'commonbloc1', 'india'] (similarity: 0.643)
  3. ['brazil', 'commonbloc1', 'jordan'] (similarity: 0.642)
  4. ['brazil', 'commonbloc1', 'egypt'] (similarity: 0.615)
  5. ['brazil', 'commonbloc1', 'burma'] (similarity: 0.602)

Validation Triple 2: ['burma', 'intergovorgs3', 'indonesia']
  1. ['india', 'intergovorgs3', 'indonesia'] (similarity: 0.565)
  2. ['burma', 'intergovorgs3', 'uk'] (similarity: 0.518)
  3. ['burma', 'intergovorgs3', 'netherlands'] (similarity: 0.459)
  4. ['burma', 'intergovorgs3', 'egypt'] (similarity: 0.456)
  5. ['burma', 'intergovorgs3', 'usa'] (similarity: 0.411)

Validation Triple 3: ['china', 'accusation', 'uk']
  1. ['china', 'accusation', 'ussr'] (similarity: 0.627)
  2. ['china', 'accusation', 'usa'] (similarity: 0.603)
  3. ['china', 'accusation', 'indonesia'] (similarity: 0.593)
  4. ['china', 'accusation', 'india'] (similarity: 0.440)
  5. ['indonesia', 'accusation', 'uk'] (similarity: 0.384)

Validation Triple 4: ['cuba', 'reldiplomacy', 'china']
  1. ['cuba', 'reldiplomacy', 'israel'] (similarity: 0.673)
  2. ['burma', 'reldiplomacy', 'china'] (similarity: 0.615)
  3. ['egypt', 'reldiplomacy', 'china'] (similarity: 0.552)
  4. ['cuba', 'reldiplomacy', 'uk'] (similarity: 0.551)
  5. ['cuba', 'reldiplomacy', 'netherlands'] (similarity: 0.523)

Validation Triple 5: ['egypt', 'embassy', 'uk']
  1. ['israel', 'embassy', 'uk'] (similarity: 0.691)
  2. ['china', 'embassy', 'uk'] (similarity: 0.602)
  3. ['egypt', 'embassy', 'cuba'] (similarity: 0.600)
  4. ['burma', 'embassy', 'uk'] (similarity: 0.593)
  5. ['ussr', 'embassy', 'uk'] (similarity: 0.585)
""")


TransE Similar Triples:

Validation Triple 1: ['brazil', 'commonbloc1', 'india']
  1. ['brazil', 'embassy', 'india'] (similarity: 0.887)
  2. ['brazil', 'ngoorgs3', 'india'] (similarity: 0.846)
  3. ['brazil', 'intergovorgs3', 'india'] (similarity: 0.812)
  4. ['brazil', 'conferences', 'india'] (similarity: 0.777)
  5. ['brazil', 'relngo', 'india'] (similarity: 0.754)

Validation Triple 2: ['burma', 'intergovorgs3', 'indonesia']
  1. ['burma', 'embassy', 'indonesia'] (similarity: 0.901)
  2. ['egypt', 'intergovorgs', 'indonesia'] (similarity: 0.699)
  3. ['burma', 'conferences', 'indonesia'] (similarity: 0.692)
  4. ['egypt', 'embassy', 'indonesia'] (similarity: 0.688)
  5. ['india', 'intergovorgs3', 'indonesia'] (similarity: 0.682)

Validation Triple 3: ['china', 'accusation', 'uk']
  1. ['china', 'negativecomm', 'uk'] (similarity: 0.924)
  2. ['china', 'negativebehavior', 'uk'] (similarity: 0.923)
  3. ['china', 'relngo', 'uk'] (similarity: 0.859)
  4. ['china', 'timesincewar', 'uk']