In [1]:
import config
import models
import tensorflow as tf
import numpy as np
import os


# Run TransE To Create initial embeddings

In [2]:
"""
Method:

Run the normal transe example (example_train_transe.py)
Write the embeddings as a file that can be read
use embeddings to initialize embedding layer in TransE_freeze.py

Append random embeddings for new entities and relations
set config freeze_train_embeddings = true
figure out how to update only the embeddings for a certain set of indices
figure out how to make sure that we only see examples using new items to speed convergence
compare new+old embeddings
"""
os.environ['CUDA_VISIBLE_DEVICES']='7'
#Input training files from benchmarks/FB15K/ folder.
con = config.Config()
#True: Input test files from the same folder.
con.set_in_path("./benchmarks/FB15K/")
con.set_test_link_prediction(True)
con.set_test_triple_classification(True)
con.set_work_threads(8)
con.set_train_times(10)
con.set_nbatches(20)
con.set_alpha(0.001)
con.set_margin(1.0)
con.set_bern(0)
con.set_dimension(100)
con.set_ent_neg_rate(1)
con.set_rel_neg_rate(0)
con.set_opt_method("SGD")

In [3]:
#Models will be exported via tf.Saver() automatically.
con.set_export_files("./res/model.vec.tf", 0)
#Model parameters will be exported to json files automatically.
con.set_out_files("./res/embedding.vec.json")
#Initialize experimental settings.
con.init()

In [4]:

#Set the knowledge embedding model
con.set_model(models.TransE)



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [5]:
#Train the model.
con.run()

Epoch: 0, loss: 305279.8212890625, time: 0.0
Epoch: 1, loss: 129026.60986328125, time: 9.5367431640625e-07
Epoch: 2, loss: 88025.84887695312, time: 0.0
Epoch: 3, loss: 70747.74853515625, time: 0.0
Epoch: 4, loss: 62002.155517578125, time: 0.0
Epoch: 5, loss: 54666.03271484375, time: 0.0
Epoch: 6, loss: 51151.710205078125, time: 9.5367431640625e-07
Epoch: 7, loss: 48109.78564453125, time: 0.0
Epoch: 8, loss: 45266.587890625, time: 9.5367431640625e-07
Epoch: 9, loss: 43888.639404296875, time: 1.1920928955078125e-06


In [6]:
# This method predicts the top k head entities given tail entity and relation.
# t (int): tail entity id
# r (int): relation id
# k (int): top k head entities
con.predict_head_entity(152, 9, 5)
con.predict_tail_entity(151, 9, 5)
con.predict_relation(151, 152, 5)

# head, tail, relation
con.predict_triple(151, 152, 9)
con.predict_triple(151, 152, 8)

[ 155 2420 1719 3012 4719]
[6189 1821 3460 2917 9528]
[375 518  98 859 974]
triple (151,152,9) is correct
triple (151,152,8) is wrong


# Run TransE Freeze using Embeddings produced in above step

In [7]:
os.environ['CUDA_VISIBLE_DEVICES']='7'
#Input training files from benchmarks/FB15K/ folder.
con = config.Config()
#True: Input test files from the same folder.
con.set_in_path("./benchmarks/FB15K/")
con.set_test_link_prediction(True)
con.set_test_triple_classification(True)
con.set_work_threads(8)
con.set_train_times(10)
con.set_nbatches(20)
con.set_alpha(0.001)
con.set_margin(1.0)
con.set_bern(0)
con.set_dimension(100)
con.set_ent_neg_rate(1)
con.set_rel_neg_rate(0)
con.set_opt_method("SGD")
con.set_freeze_train_embeddings(True)
con.set_ent_embedding_initializer("./res/embedding.vec.json")
con.set_rel_embedding_initializer("./res/embedding.vec.json")

In [8]:
#Models will be exported via tf.Saver() automatically.
con.set_export_files("./res/model_new.vec.tf", 0)
#Model parameters will be exported to json files automatically.
con.set_out_files("./res/embedding_new.vec.json")
#Initialize experimental settings.
con.init()
#Set the knowledge embedding model
con.set_model(models.TransE_freeze)

<tensorflow.python.ops.init_ops.Constant object at 0x125b995f8>
LOOK RIGHT HERE!!!!!


In [9]:
#Train the model.
con.run()

Epoch: 0, loss: 41839.50158691406, time: 1.1920928955078125e-06
Epoch: 1, loss: 43186.323974609375, time: 0.0
Epoch: 2, loss: 42300.547119140625, time: 1.9073486328125e-06
Epoch: 3, loss: 41839.842529296875, time: 9.5367431640625e-07
Epoch: 4, loss: 42122.31640625, time: 9.5367431640625e-07
Epoch: 5, loss: 42531.62451171875, time: 1.1920928955078125e-06
Epoch: 6, loss: 43029.798828125, time: 9.5367431640625e-07
Epoch: 7, loss: 43140.5546875, time: 9.5367431640625e-07
Epoch: 8, loss: 42066.09240722656, time: 9.5367431640625e-07
Epoch: 9, loss: 41433.55712890625, time: 1.1920928955078125e-06


# Compare new and old embeddings

In [10]:
import json
with open("./res/embedding.vec.json", "r") as f: 
    old_embeddings = json.loads(f.read())
    old_ent_embeddings = old_embeddings["ent_embeddings"]
    old_rel_embeddings = old_embeddings["rel_embeddings"]


In [11]:
with open("./res/embedding_new.vec.json", "r") as f: 
    new_embeddings = json.loads(f.read())
    new_ent_embeddings = new_embeddings["ent_embeddings"]
    new_rel_embeddings = new_embeddings["rel_embeddings"]

In [14]:
old_ent_embeddings[0] == new_ent_embeddings[0]

True

In [15]:
old_rel_embeddings[0] == new_rel_embeddings[0]

False