In [1]:
from keras.layers import Input, Embedding, Reshape, Dot, Activation, Flatten, BatchNormalization
from keras.models import Model
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [8]:
# Constants
WORDS_COUNT = 133778
EMB_OUT_DIM = 32
REL_COUNT = 8

# Input
input_words_1 = Input(shape=(1,), dtype='int32', name='input_words_1')
input_words_2 = Input(shape=(1,), dtype='int32', name='input_words_2')
input_rels = Input(shape=(1,), dtype='int32', name='input_rels')

# Embeddings
words_emb_layer = Embedding(input_dim=WORDS_COUNT, output_dim=EMB_OUT_DIM)
words_emb_1 = words_emb_layer(input_words_1)
words_emb_2 = words_emb_layer(input_words_2)

rels_emb = Embedding(input_dim=REL_COUNT, output_dim=EMB_OUT_DIM**2, input_length=1)(input_rels)

# Reshape Relations Embedding
rels_matrix = Reshape(target_shape=(EMB_OUT_DIM, EMB_OUT_DIM))(rels_emb)

# Merge
dot_w1_rel = Dot(axes=2)([words_emb_1, rels_matrix])
dot_w1_rel_w2 = Dot(axes=2)([dot_w1_rel, words_emb_2])

# Transpose result
dot_result = Reshape(target_shape=(1,))(dot_w1_rel_w2)

# Output
output_layer = Activation(activation='sigmoid')(dot_result)

In [9]:
# Model
model = Model(inputs=[input_words_1, input_words_2, input_rels], outputs=[output_layer])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [10]:
W1 = []
W2 = []
R = []
Y = []

with open(str(WORDS_COUNT) + ".set") as fp:
    for line in fp:
        w1, r, w2, y = line.split()
        W1.append([w1])
        W2.append([w2])
        R.append([r])
        Y.append([y])

In [11]:
W1 = np.asarray(W1)
W2 = np.asarray(W2)
R = np.asarray(R)
Y = np.asarray(Y)

In [12]:
from keras.callbacks import CSVLogger, Callback
import csv

class BatchHistory(Callback):
    def on_train_begin(self, logs={}):
        self.bla=[]
    
    def on_batch_end(self, batch, logs={}):
        self.bla.append((batch, logs.get('loss'), logs.get('acc')))
    
    def on_train_end(self, logs={}):
        FILE='batch.logs.csv'
        np.savetxt(FILE, self.bla, delimiter=',')

In [13]:
EPOCHS=3
#callbacks

csv_logger = CSVLogger('training.log')
batch_hist = BatchHistory()
model.fit(x=[W1,W2,R], y=Y, batch_size=32, epochs=EPOCHS, validation_split=.2, callbacks=[csv_logger, batch_hist])

Train on 422976 samples, validate on 105744 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f9ce393a9e8>

In [None]:
model.save("{0} [{1}].hd5".format(WORDS_COUNT, EPOCHS))