# Holographic Embeddings of Knowledge Graph
This model generates embeddings of entities and relations in a knowledge graph with pairwise ranking loss

In [2]:
from __future__ import print_function
from base import Experiment

import logging
from skge import sample
from skge import HolE
from skge.base import StochasticTrainer, PairwiseStochasticTrainer
from base import cosin_distance
import numpy as np
from base import FilteredRankingEval,ranking_scores,ranking_hits,pre_process_data
from skge.util import ccorr
import pickle
import timeit
import matplotlib.pyplot as plt
from Holographic_Embedding import HolE_Embedding, HolEEval
import numpy as np

logging.basicConfig(level=logging.DEBUG)

log = logging.getLogger('EX-KG')


# Loading Data

Given the data path, the data will be converted.

- train: data for training

- valid: data for validation

- test: data for testing

- entities: dictionary stroing relations between entity ids and entities

- relations: dictionary storing relations between relation ids and relations

In [3]:
example = HolE_Embedding()
fileName = ['data/WN18/train.txt', 'data/WN18/valid.txt', 'data/WN18/test.txt']
example.input_file = fileName
prefix = ''
if fileName[0].rfind('/') >= 0:
    prefix = fileName[0][:fileName[0].rfind('/')] + '/'
example.output_file = prefix + 'output.txt'

example.train, example.valid, example.test, example.entities, example.relations = example.read_dataset()

# Settings

evaluator : Holographic Embedding Evaluaiton model, use pairwise ranking loss to update with associative memory

margin: Margin for loss function, if similarity between a positive and a negative example is greater than margin, it is a violation, 0.2

lr: Learning rate, 0.1

me: The maximum epoches: 5

ne: The number of negative instances the models tries to generate for each positive instance: 1

nb: The number of batches: 50

test_all: Evaluate Test set after each x epochs: 5

afs: Activation function: sigmoid

ncomp: Dimension of each embedding vector: 300

# Building Evaluator, Model, and Trainer

# Training the Model and Generating Embeddings

The epoch, execution time and number of violations will be printed during training

Violation: the numeber of negative instances that are close to positive instances, difference between their activation function outputs is less than the defined margin.

In [4]:
[embedding_vector, model, ev_test] = example.learn_embeddings()

INFO:EX-KG:Fitting model HolE with trainer PairwiseStochasticTrainer
INFO:EX-KG:[  1] time = 28s, violations = 216820
INFO:EX-KG:[  2] time = 16s, violations = 84439
INFO:EX-KG:[  3] time = 12s, violations = 40180
INFO:EX-KG:[  4] time = 11s, violations = 25724
INFO:EX-KG:[  5] time = 10s, violations = 18777
DEBUG:EX-KG:FMRR valid = 1006.673123, best = 1000000.000000
INFO:EX-KG:[  5] time = 373s, violations = 18777
DEBUG:EX-KG:FMRR valid = 1006.673123, best = 1006.673123


# Saving Embeddings

In [5]:
c1 = open(example.output_file, "w")

for i in range(len(model.E)):
    c1.write(str(example.entities[i]) + " " + str(np.array(model.E[i])) + '\n')
for i in range(len(model.R)):
    c1.write(str(example.relations[i]) + " " + str(np.array(model.R[i])) + '\n')
c1.close()

print(example.entities[0])
print(example.model.E[0])

0
[-0.04100875 -0.00700551 -0.0111604   0.06573371  0.00241421 -0.0560459
 -0.06505495 -0.12279784  0.08778003  0.10722331 -0.0041038  -0.09441672
 -0.02858488 -0.00754178  0.17159011  0.15690541  0.08834875  0.05108197
 -0.00639358  0.08770879  0.01697101 -0.08326885 -0.02126091 -0.03209237
  0.00974679  0.02557857  0.0696245  -0.01819768 -0.02253713 -0.1412152
  0.0350013   0.00309914  0.05776609 -0.05487634 -0.13118874 -0.11380952
 -0.034026   -0.09087033  0.01439053  0.05169777  0.05134972 -0.0322808
 -0.01361544  0.00993792  0.1291119   0.07129353 -0.0016742  -0.05006641
 -0.07787545 -0.06045609  0.01686473 -0.04393624  0.01610761 -0.08905419
 -0.05843089 -0.10803511  0.08691467  0.03381053  0.01983609 -0.05061037
 -0.06664968  0.05154633  0.07047081 -0.05055067  0.0241964  -0.04280385
 -0.03082456  0.0741201   0.10864919  0.05913226  0.03204919  0.0306235
  0.05307181  0.0136236   0.06016472 -0.0441248  -0.15008133 -0.09731735
  0.08134456 -0.03235199 -0.01046687 -0.05770456 -0.0

# Saving Model

In [6]:
example.save_model('data/WN18/')

# Evaluating Embeddings

The evaluation metrics here are 

    overall cosine similarity between the precited entities and the target entities' embeddings generated by the model.
    
    overall MR (Mean Rank) of all predictions
    
    overall Hits@1, Hits@3 and Hits@10 of all predictinos

In [7]:
result = example.evaluate(fileName, model, ev_test)

print("cosine similarity: "+ str(result['cosine similarity'])+'\n')
print("MR: " + str(result['MR']) + '\n')
print("Hits@1: " + str(result['Hits']['1']) + '\n')

print("Hits@3: " + str(result['Hits']['3']) + '\n')
print("Hits@10: " + str(result['Hits']['10']) + '\n')

cosine similarity: 0.3257791901231025

MR: 997.8860899237254

Hits@1: 53.09112806101967

Hits@3: 61.15014050582096

Hits@10: 68.47651545564031

