In [1]:
from model import *
from reader import *

In [2]:
opts = Options()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

sess = tf.InteractiveSession(config=config)
use_jeval = True

In [None]:
file_name = '237-dskg-hs512'

opts.data_path = 'data/FB15k-237/'

opts.hidden_size = 512
opts.num_samples = 2048*3
opts.keep_prob = 0.5
opts.num_layers = 2
opts.learning_rate=0.001

model = FBRespective(opts, sess)
# model.saver.restore(save_path='ckpt/237-dskg-hs512_4', sess= sess)

In [None]:
file_name = 'fb-dskg-hs512'

opts.hidden_size = 512
opts.num_samples = 2048*3
opts.keep_prob = 0.5
opts.num_layers = 2
opts.learning_rate=0.001

model = FBRespective(opts, sess)
# model.saver.restore(save_path='ckpt/fb-dskg-hs512_4', sess= sess)

In [None]:
file_name = 'wn-dskg-hs512'

opts.hidden_size = 512
opts.num_samples = 2048*3
opts.keep_prob = 0.5
opts.num_layers = 2
opts.learning_rate=0.001

opts.data_path = 'data/wordnet-mlj12/'

model = WNRespective(opts, sess)


In [4]:
def cal_ranks(probs, method, label):
    if method == 'min':
        probs = probs - probs[range(len(label)), label].reshape(len(probs), 1)
        ranks = (probs > 0).sum(axis=1) + 1
    else:
        ranks = pd.DataFrame(probs).rank(axis=1, ascending=False, method=method)
        ranks = ranks.values[range(len(label)), label]
    return ranks

def cal_performance(ranks, top=10):
    m_r = sum(ranks) * 1.0 / len(ranks)
    h_10 = sum(ranks <= top) * 1.0 / len(ranks)
    mrr = (1. / ranks).sum() / len(ranks)
    return m_r, h_10, mrr

def eval_entity_prediction(model, data, filter_mat, method='min', return_ranks=False, return_probs=False, return_label_probs=False):
    options = model._options
    batch_size = options.batch_size
    
    label = data[:, 2]
    
    data, padding_num = model.padding_data(data)

    num_batch = len(data) // batch_size 
    
    e_placeholder, r_placeholder, fectch_entity_probs = model._eval_e, model._eval_r, model._entity_probs
    
    probs = []
    for i in range(num_batch):
        e = data[:, 0][i * batch_size:(i + 1) * batch_size]
        r = data[:, 1][i * batch_size:(i + 1) * batch_size]
        
        feed_dict = {}
        feed_dict[e_placeholder] = e
        feed_dict[r_placeholder] = r
        
        probs.append(sess.run(fectch_entity_probs, feed_dict))
    probs = np.concatenate(probs)[:len(data) - padding_num]

    if return_label_probs:
        return probs[range(len(label)), label]
    
    if return_probs:
        return probs

    filter_probs = probs * filter_mat
    filter_probs[range(len(label)), label] = probs[range(len(label)), label]

    filter_ranks = cal_ranks(filter_probs, method=method, label=label)
    if return_ranks:
        return filter_ranks
    ranks = cal_ranks(probs, method=method, label=label)
    m_r, h_10, mrr = cal_performance(ranks)
    f_m_r, f_h_10, f_mrr = cal_performance(filter_ranks)
    
    return (m_r, h_10, mrr, f_m_r, f_h_10, f_mrr)

def eval_relation_prediction(model, data, filter_mat, method='min', return_ranks=False, return_probs=False):
    options = model._options
    batch_size = options.batch_size
    
    #data[:, 0]-->e, data[:, 1]-->r, data[:, 2]-->e2
    label = data[:, 1]
    
    data, padding_num = model.padding_data(data)

    num_batch = len(data) // batch_size
    
    e_placeholder, fectch_relation_probs = model._eval_e, model._relation_probs
    
    probs = []
    
    for i in range(num_batch):
        e = data[:, 0][i * batch_size:(i + 1) * batch_size]
        
        feed_dict = {}
        feed_dict[e_placeholder] = e
        
        probs.append(sess.run(fectch_relation_probs, feed_dict))
        
    probs = np.concatenate(probs)[:len(data) - padding_num]
    return probs


In [5]:
test_data = np.array(model._test_data[['h_id', 'r_id', 't_id']].values)
train_data = model._train_data[['h_id', 'r_id', 't_id']].values
valid_data = model._valid_data[['h_id', 'r_id', 't_id']].values
filter_mat = model._tail_test_filter_mat
vfilter_mat = model._tail_valid_filter_mat

all_data = np.concatenate([train_data, test_data,valid_data])
p_data = np.concatenate([test_data,valid_data])

def gen_rev_rel(test_data):
    half = len(test_data)//2
    forward = test_data[:half]
    back = test_data[half:]
    rev_rel_test_data = test_data[:]
    rev_rel = np.concatenate([back[:,1], forward[:,1]])
    return rev_rel

rev_rel = gen_rev_rel(test_data)
vrev_rel=  gen_rev_rel(valid_data)

rev_rel_test_data = np.stack([np.arange(model._entity_num),np.arange(model._entity_num)], axis=1)

In [6]:
def cal_r(probs, label, filter_mat):
    filter_probs = probs * filter_mat
    
    filter_probs[range(len(label)), label] = probs[range(len(label)), label]
    filter_ranks = cal_ranks(filter_probs, method='min', label=label)
    
    return filter_ranks


def joint_eval(test_data, filter_mat, rev_rel):
    label=test_data[:, 2]

    ep =  eval_entity_prediction(model, data=test_data, filter_mat=filter_mat, return_probs=True)
    efr = cal_r(ep, label, filter_mat)
    if use_jeval:
        rp = eval_relation_prediction(model, rev_rel_test_data, filter_mat=None, return_probs=True).T
        rp = rp**0.33
        rp = rp[rev_rel]
        joint_probs = ep * rp
        joint_fr = cal_r(joint_probs, label, filter_mat)
    else:
        joint_fr = efr
    return joint_fr, efr

def process_ranks(efr, i=0, last_mean_loss=1000):

    MR, H1, MRR = cal_performance(efr[:len(efr)], top=1)
    _, H10, _ = cal_performance(efr[:len(efr)], top=10)
    msg = 'epoch:%i, Hits@1:%.3f, Hits@10:%.3f, MR:%.3f, MRR:%.3f, mean_loss:%.3f' % (i, H1, H10, MR, MRR, last_mean_loss)
    print(msg)
    return (i, H1, H10, MR, MRR, last_mean_loss)

def handle_eval(i=0, last_mean_loss=1000, valid=True, test=True):
    if valid:
        jfr, efr = joint_eval(test_data=valid_data, filter_mat=vfilter_mat, rev_rel=vrev_rel)
        msg = process_ranks(efr, i, last_mean_loss)
        jmsg = process_ranks(jfr, i, last_mean_loss)
        valid_results.append(msg)
        valid_results.append(jmsg)
        if i % 50 == 0:
            pd.DataFrame(valid_results, columns=['epoch','Hits@1', 'Hits@10', 'MR', 'MRR', 'mean_loss']).to_csv('results/'+file_name+'valid')
        
    if test:
        jfr, efr = joint_eval(test_data=test_data, filter_mat=filter_mat, rev_rel=rev_rel)
        msg = process_ranks(efr, i, last_mean_loss)
        jmsg = process_ranks(jfr, i, last_mean_loss)
        results.append(msg)
        results.append(jmsg)
        if i % 50 == 0:
            pd.DataFrame(results, columns=['epoch','Hits@1', 'Hits@10', 'MR', 'MRR', 'mean_loss']).to_csv('results/'+file_name+'test')
    return 

In [None]:
jfr, efr = joint_eval(test_data=test_data, filter_mat=filter_mat, rev_rel=rev_rel)
msg = process_ranks(efr, 0, 0)
jmsg = process_ranks(jfr, 0, 0)

In [None]:
epoch =0
results = []
valid_results = []
last_mean_loss=1000

In [None]:
for i in range(epoch, 505):
    if i % 5 == 0:
        handle_eval(i=i, last_mean_loss=last_mean_loss, valid=True, test=True)
    last_mean_loss = model.train()
    epoch += 1