### Train the inductive link prediction model

In [1]:
data_name = 'WN18RR_v4'
model_id = 'SiaLP_3_new'
lower_bound = 1
upper_bound_path = 10
upper_bound_subg = 3

In [2]:
#difine the names for saving
model_name = 'Model_' + model_id + '_' + data_name
one_hop_model_name = 'One_hop_model_' + model_id + '_' + data_name
ids_name = 'IDs_' + model_id + '_' + data_name

In [3]:
import librosa
import opensmile
import os
import sys
import numpy as np
import random
import pickle

from collections import defaultdict
from copy import deepcopy
from sklearn.utils import shuffle
from sys import getsizeof

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import initializers
from tensorflow.keras.utils import plot_model

In [4]:
class LoadKG:
    
    def __init__(self):
        
        self.x = 'Hello'
        
    def load_train_data(self, data_path, one_hop, data, s_t_r, entity2id, id2entity,
                     relation2id, id2relation):
        
        data_ = set()
    
        ####load the train, valid and test set##########
        with open (data_path, 'r') as f:
            
            data_ini = f.readlines()
                        
            for i in range(len(data_ini)):
            
                x = data_ini[i].split()
                
                x_ = tuple(x)
                
                data_.add(x_)
        
        ####relation dict#################
        index = len(relation2id)
     
        for key in data_:
            
            if key[1] not in relation2id:
                
                relation = key[1]
                
                relation2id[relation] = index
                
                id2relation[index] = relation
                
                index += 1
                
                #the inverse relation
                iv_r = '_inverse_' + relation
                
                relation2id[iv_r] = index
                
                id2relation[index] = iv_r
                
                index += 1
        
        #get the id of the inverse relation, by above definition, initial relation has 
        #always even id, while inverse relation has always odd id.
        def inverse_r(r):
            
            if r % 2 == 0: #initial relation
                
                iv_r = r + 1
            
            else: #inverse relation
                
                iv_r = r - 1
            
            return(iv_r)
        
        ####entity dict###################
        index = len(entity2id)
        
        for key in data_:
            
            source, target = key[0], key[2]
            
            if source not in entity2id:
                                
                entity2id[source] = index
                
                id2entity[index] = source
                
                index += 1
            
            if target not in entity2id:
                
                entity2id[target] = index
                
                id2entity[index] = target
                
                index += 1
                
        #create the set of triples using id instead of string        
        for ele in data_:
            
            s = entity2id[ele[0]]
            
            r = relation2id[ele[1]]
            
            t = entity2id[ele[2]]
            
            if (s,r,t) not in data:
                
                data.add((s,r,t))
            
            s_t_r[(s,t)].add(r)
            
            if s not in one_hop:
                
                one_hop[s] = set()
            
            one_hop[s].add((r,t))
            
            if t not in one_hop:
                
                one_hop[t] = set()
            
            r_inv = inverse_r(r)
            
            s_t_r[(t,s)].add(r_inv)
            
            one_hop[t].add((r_inv,s))
            
        #change each set in one_hop to list
        for e in one_hop:
            
            one_hop[e] = list(one_hop[e])

In [5]:
class ObtainPathsByDynamicProgramming:

    def __init__(self, amount_bd=50, size_bd=50, threshold=20000):
        
        self.amount_bd = amount_bd #how many Tuples we choose in one_hop[node] for next recursion
                        
        self.size_bd = size_bd #size bound limit the number of paths to a target entity t
        
        #number of times paths with specific length been performed for recursion
        self.threshold = threshold
        
    '''
    Given an entity s, the function will find the paths from s to other entities, using recursion.
    
    One may refer to LeetCode Problem 797 for details:
        https://leetcode.com/problems/all-paths-from-source-to-target/
    '''
    def obtain_paths(self, mode, s, t_input, lower_bd, upper_bd, one_hop):

        if type(lower_bd) != type(1) or lower_bd < 1:
            
            raise TypeError("!!! invalid lower bound setting, must >= 1 !!!")
            
        if type(upper_bd) != type(1) or upper_bd < 1:
            
            raise TypeError("!!! invalid upper bound setting, must >= 1 !!!")
            
        if lower_bd > upper_bd:
            
            raise TypeError("!!! lower bound must not exced upper bound !!!")
            
        if s not in one_hop:
            
            raise ValueError('!!! entity not in one_hop. Please work on existing entities')

        #here is the result dict. Its key is each entity t sharing paths from s
        #The value of each t is a set containing the paths from s to t
        #These paths can be either the direct connection r, or a multi-hop path
        res = defaultdict(set)
        
        #qualified_t contains the types of t we want to consider,
        #that is, what t will be added to the result set.
        qualified_t = set()

        #under this mode, we will only consider the direct neighbour of s
        if mode == 'direct_neighbour':
        
            for Tuple in one_hop[s]:
            
                t = Tuple[1]
                
                qualified_t.add(t)
        
        #under this mode, we will only consider one specified entity t
        elif mode == 'target_specified':
            
            qualified_t.add(t_input)
        
        #under this mode, we will consider any entity
        elif mode == 'any_target':
            
            for s_any in one_hop:
                
                qualified_t.add(s_any)
                
        else:
            
            raise ValueError('not a valid mode')
        
        '''
        We use recursion to find the paths
        On current node with the path [r1, ..., rk] and on-path entities {s, e1, ..., ek-1, node}
        from s to this node, we will further find the direct neighbor t' of this node. 
        If t' is not an on-path entity (not among s, e1,...ek-1, node), we recursively proceed to t' 
        '''
        def helper(node, path, on_path_en, res, qualified_t, lower_bd, upper_bd, one_hop, count_dict):

            #when the current path is within lower_bd and upper_bd, 
            #and the node is among the qualified t, and it has not been fill of paths w.r.t size_limit,
            #we will add this path to the node
            if (len(path) >= lower_bd) and (len(path) <= upper_bd) and (
                node in qualified_t) and (len(res[node]) < self.size_bd):
                
                res[node].add(tuple(path))
                    
            #won't start new recursions if the current path length already reaches upper limit
            #or the number of recursions performed on this length has reached the limit
            if (len(path) < upper_bd) and (count_dict[len(path)] <= self.threshold):
                                
                #temp list is the id list for us to go-over one_hop[node]
                temp_list = [i for i in range(len(one_hop[node]))]
                random.shuffle(temp_list) #so we random-shuffle the list
                
                #only take 20 recursions if there are too many (r,t)
                for i in temp_list[:self.amount_bd]:
                    
                    #obtain tuple of (r,t)
                    Tuple = one_hop[node][i]
                    r, t = Tuple[0], Tuple[1]
                    
                    #add to count_dict even if eventually this step not proceed
                    count_dict[len(path)] += 1
                    
                    #if t not on the path and we not exceed the computation threshold, 
                    #then finally proceed to next recursion
                    if (t not in on_path_en) and (count_dict[len(path)] <= self.threshold):

                        helper(t, path + [r], on_path_en.union({t}), res, qualified_t, 
                               lower_bd, upper_bd, one_hop, count_dict)

        length_dict = defaultdict(int)
        count_dict = defaultdict(int)
        
        helper(s, [], {s}, res, qualified_t, lower_bd, upper_bd, one_hop, count_dict)
        
        return(res, count_dict)

In [6]:
train_path = '../data/' + data_name + '/train.txt'
valid_path = '../data/' + data_name + '/valid.txt'
test_path = '../data/' + data_name + '/test.txt'

In [7]:
#load the classes
Class_1 = LoadKG()
Class_2 = ObtainPathsByDynamicProgramming()

In [8]:
#define the dictionaries and sets for load KG
one_hop = dict() 
data = set()
s_t_r = defaultdict(set)

#define the dictionaries, which is shared by initail and inductive train/valid/test
entity2id = dict()
id2entity = dict()
relation2id = dict()
id2relation = dict()

#fill in the sets and dicts
Class_1.load_train_data(train_path, one_hop, data, s_t_r,
                        entity2id, id2entity, relation2id, id2relation)

In [9]:
#define the dictionaries and sets for load KG
one_hop_valid = dict() 
data_valid = set()
s_t_r_valid = defaultdict(set)

#fill in the sets and dicts
Class_1.load_train_data(valid_path, one_hop_valid, data_valid, s_t_r_valid,
                        entity2id, id2entity, relation2id, id2relation)

In [10]:
#define the dictionaries and sets for load KG
one_hop_test = dict() 
data_test = set()
s_t_r_test = defaultdict(set)

#fill in the sets and dicts
Class_1.load_train_data(test_path, one_hop_test, data_test, s_t_r_test,
                        entity2id, id2entity, relation2id, id2relation)

#### Build the path-based siamese neural network structure

We use biLSTM to train on the input path embedding sequence to predict the output embedding or the relation.

In [11]:
# Input layer, using integer to represent each relation type
#note that inputs_path is the path inputs, while inputs_out_re is the output relation inputs
fst_path = keras.Input(shape=(None,), dtype="int32")
scd_path = keras.Input(shape=(None,), dtype="int32")
thd_path = keras.Input(shape=(None,), dtype="int32")

#the relation input layer (for output embedding)
id_rela = keras.Input(shape=(None,), dtype="int32")

# Embed each integer in a 300-dimensional vector as input,
# note that we add another "space holder" embedding, 
# which hold the spaces if the initial length of paths are not the same
in_embd_var = layers.Embedding(len(relation2id)+1, 300)

# Obtain the embedding
fst_p_embd = in_embd_var(fst_path)
scd_p_embd = in_embd_var(scd_path)
thd_p_embd = in_embd_var(thd_path)

# Embed each integer in a 300-dimensional vector as output
rela_embd = layers.Embedding(len(relation2id)+1, 300)(id_rela)

#add 2 layer bi-directional LSTM
lstm_layer_1 = layers.Bidirectional(layers.LSTM(150, return_sequences=True))
lstm_layer_2 = layers.Bidirectional(layers.LSTM(150, return_sequences=True))

#first LSTM layer
fst_lstm_mid = lstm_layer_1(fst_p_embd)
scd_lstm_mid = lstm_layer_1(scd_p_embd)
thd_lstm_mid = lstm_layer_1(thd_p_embd)

#second LSTM layer
fst_lstm_out = lstm_layer_2(fst_lstm_mid)
scd_lstm_out = lstm_layer_2(scd_lstm_mid)
thd_lstm_out = lstm_layer_2(thd_lstm_mid)

#reduce max
fst_reduce_max = tf.reduce_max(fst_lstm_out, axis=1)
scd_reduce_max = tf.reduce_max(scd_lstm_out, axis=1)
thd_reduce_max = tf.reduce_max(thd_lstm_out, axis=1)

#concatenate the output vector from both siamese tunnel: (Batch, 900)
path_concat = layers.concatenate([fst_reduce_max, scd_reduce_max, thd_reduce_max], axis=-1)

#add dropout on top of the concatenation from all channels
dropout = layers.Dropout(0.25)(path_concat)

#multiply into output embd size by dense layer: (Batch, 300)
path_out_vect = layers.Dense(300, activation='tanh')(dropout)

#remove the time dimension from the output embd since there is only one step
rela_out_embd = tf.reduce_sum(rela_embd, axis=1)

# Normalize the vectors to have unit length
path_out_vect_norm = tf.math.l2_normalize(path_out_vect, axis=-1)
rela_out_embd_norm = tf.math.l2_normalize(rela_out_embd, axis=-1)

# Calculate the dot product
dot_product = layers.Dot(axes=-1)([path_out_vect_norm, rela_out_embd_norm])

#put together the model
model = keras.Model([fst_path, scd_path, thd_path, id_rela], dot_product)

2023-05-15 13:37:22.889519: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
#config the Adam optimizer 
opt = keras.optimizers.Adam(learning_rate=0.0005, decay=1e-6)

#compile the model
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])

#### Build the subgraph-based siamese neural network

In [13]:
#each input is an vector with number of relations to be dim:
#each dim represent the existence (1) or not (0) of an out-going relation from the entity
source_path_1 = keras.Input(shape=(None,), dtype="int32")
source_path_2 = keras.Input(shape=(None,), dtype="int32")
source_path_3 = keras.Input(shape=(None,), dtype="int32")

target_path_1 = keras.Input(shape=(None,), dtype="int32")
target_path_2 = keras.Input(shape=(None,), dtype="int32")
target_path_3 = keras.Input(shape=(None,), dtype="int32")

#the relation input layer (for output embedding)
id_rela_ = keras.Input(shape=(None,), dtype="int32")

# Embed each integer in a 300-dimensional vector as input,
# note that we add another "space holder" embedding, 
# which hold the spaces if the initial length of paths are not the same
in_embd_var_ = layers.Embedding(len(relation2id)+1, 300)

# Obtain the source embeddings
source_embd_1 = in_embd_var_(source_path_1)
source_embd_2 = in_embd_var_(source_path_2)
source_embd_3 = in_embd_var_(source_path_3)

#Obtain the target embeddings
target_embd_1 = in_embd_var_(target_path_1)
target_embd_2 = in_embd_var_(target_path_2)
target_embd_3 = in_embd_var_(target_path_3)

# Embed each integer in a 300-dimensional vector as output
rela_embd_ = layers.Embedding(len(relation2id)+1, 300)(id_rela_)

#add 2 layer bi-directional LSTM network
lstm_1 = layers.Bidirectional(layers.LSTM(150, return_sequences=True))
lstm_2 = layers.Bidirectional(layers.LSTM(150, return_sequences=True))

###source lstm implimentation########
#first LSTM layer
source_mid_1 = lstm_1(source_embd_1)
source_mid_2 = lstm_1(source_embd_2)
source_mid_3 = lstm_1(source_embd_3)

#second LSTM layer
source_out_1 = lstm_2(source_mid_1)
source_out_2 = lstm_2(source_mid_2)
source_out_3 = lstm_2(source_mid_3)

#reduce max
source_max_1 = tf.reduce_max(source_out_1, axis=1)
source_max_2 = tf.reduce_max(source_out_2, axis=1)
source_max_3 = tf.reduce_max(source_out_3, axis=1)

#concatenate the output vector from both siamese tunnel: (Batch, 900)
source_concat = layers.concatenate([source_max_1, source_max_2, source_max_3], axis=-1)

#add dropout on top of the concatenation from all channels
source_dropout = layers.Dropout(0.25)(source_concat)

###target lstm implimentation########
#first LSTM layer
target_mid_1 = lstm_1(target_embd_1)
target_mid_2 = lstm_1(target_embd_2)
target_mid_3 = lstm_1(target_embd_3)

#second LSTM layer
target_out_1 = lstm_2(target_mid_1)
target_out_2 = lstm_2(target_mid_2)
target_out_3 = lstm_2(target_mid_3)

#reduce max
target_max_1 = tf.reduce_max(target_out_1, axis=1)
target_max_2 = tf.reduce_max(target_out_2, axis=1)
target_max_3 = tf.reduce_max(target_out_3, axis=1)

#concatenate the output vector from both siamese tunnel: (Batch, 900)
target_concat = layers.concatenate([target_max_1, target_max_2, target_max_3], axis=-1)

#add dropout on top of the concatenation from all channels
target_dropout = layers.Dropout(0.25)(target_concat)

#further concatenate source and target output embeddings: (Batch, 1800)
final_concat = layers.concatenate([source_dropout, target_dropout], axis=-1)

#multiply into output embd size by dense layer: (Batch, 300)
out_vect = layers.Dense(300, activation='tanh')(final_concat)

#remove the time dimension from the output embd since there is only one step
rela_out_embd_ = tf.reduce_sum(rela_embd_, axis=1)

# Normalize the vectors to have unit length
out_vect_norm = tf.math.l2_normalize(out_vect, axis=-1)
rela_out_embd_norm_ = tf.math.l2_normalize(rela_out_embd_, axis=-1)

# Calculate the dot product
dot_product_ = layers.Dot(axes=-1)([out_vect_norm, rela_out_embd_norm_])

#put together the model
model_2 = keras.Model([source_path_1, source_path_2, source_path_3,
                       target_path_1, target_path_2, target_path_3, id_rela_], dot_product_)

In [14]:
#config the Adam optimizer 
opt_ = keras.optimizers.Adam(learning_rate=0.0005, decay=1e-6)

#compile the model
model_2.compile(loss='binary_crossentropy', optimizer=opt_, metrics=['binary_accuracy'])

### Build the big-batch for path-based model
We will build the big-batch for the path-based model training. That is, we will build three list to store three paths, respectively.

In order to reduce computational complexity, we will run the path-finding algorithm for each entity e in the dataset before the training. That is, for each entity e, we will have two dictionaries. Dict 1 stores the paths between e and any other entities in the dataset. Will Dict 2 stores the paths between e and its direct neighbors. The two dicts will be used and invariant throughout the training.

* At each step, three different paths between two entities s and t are selected. Each path is append to one of the list. 
* If this step is for positive samples, the existing relation r will be selected between s and t. If there are more than one relation from s to t, we randomly choose one. Also, the label list will be appended 1.
* If this step is for negative samples, one relation that does not exist between s and t will be selected randomly and append to the relation list. Also, the label list will be appended 0.
* In practice, the positive step is always fallowed by a negative step. The same paths in the positive step will be used in the next negative step, while the relation is a negative one chosen in the above way.
* We do this until the length limit is reached.

**For relation prediciton, we will only need to train using (s,r,t) triple. (t,r-1,s) is not necessary and hence not included in training.**

In [15]:
#function to build the big batche for path-based training
def build_big_batches_path(lower_bd, upper_bd, data, one_hop, s_t_r,
                      x_p_list, x_r_list, y_list,
                      relation2id, entity2id, id2relation, id2entity):
    
    #the set of all relation IDs
    relation_id_set = set()
    
    #the set of all initial relations
    ini_r_id_set = set()
    
    for i in range(len(id2relation)):
        
        if i not in id2relation:
            raise ValueError('error when generaing id2relation')
        
        relation_id_set.add(i)
        
        if i % 2 == 0: #initial relation id is always an even number
            ini_r_id_set.add(i)
    
    num_r = len(id2relation)
    num_ini_r = len(ini_r_id_set)
    
    if num_ini_r != int(num_r/2):
        raise ValueError('error when generating id2relation')
    
    #in case not all entities in entity2id are in one_hop, 
    #so we need to find out who are indeed in
    existing_ids = set()
    
    for s_1 in one_hop:
        existing_ids.add(s_1)
        
    existing_ids = list(existing_ids)
    random.shuffle(existing_ids)
    
    count = 0
    for s in existing_ids:
        
        #impliment the path finding algorithm to find paths between s and t
        result, length_dict = Class_2.obtain_paths('direct_neighbour', s, 'nb', lower_bd, upper_bd, one_hop)
        
        for iteration in range(10):

            #proceed only if at least three paths are between s and t
            for t in result:

                if len(s_t_r[(s,t)]) == 0:

                    raise ValueError(s,t,id2entity[s], id2entity[t])

                #we are only interested in forward link in relation prediciton
                ini_r_list = list()

                #obtain initial relations between s and t
                for r in s_t_r[(s,t)]:
                    if r % 2 == 0:#initial relation id is always an even number
                        ini_r_list.append(r)

                #if there exist more than three paths between s and t, 
                #and inital connection between s and t exists,
                #and not every r in the relation dictionary exists between s and t (although this is rare)
                #we then proceed
                if len(result[t]) >= 3 and len(ini_r_list) > 0 and len(ini_r_list) < int(num_ini_r):

                    #obtain the list form of all the paths from s to t
                    temp_path_list = list(result[t])

                    temp_pair = random.sample(temp_path_list, 3)

                    path_1, path_2, path_3 = temp_pair[0], temp_pair[1], temp_pair[2]

                    #####positive#####################
                    #append the paths: note that we add the space holder id at the end of the shorter path
                    x_p_list['1'].append(list(path_1) + [num_r]*abs(len(path_1)-upper_bd))
                    x_p_list['2'].append(list(path_2) + [num_r]*abs(len(path_2)-upper_bd))
                    x_p_list['3'].append(list(path_3) + [num_r]*abs(len(path_3)-upper_bd))

                    #append relation
                    r = random.choice(ini_r_list)
                    x_r_list.append([r])
                    y_list.append(1.)

                    #####negative#####################
                    #append the paths: note that we add the space holder id at the end
                    #of the shorter path
                    x_p_list['1'].append(list(path_1) + [num_r]*abs(len(path_1)-upper_bd))
                    x_p_list['2'].append(list(path_2) + [num_r]*abs(len(path_2)-upper_bd))
                    x_p_list['3'].append(list(path_3) + [num_r]*abs(len(path_3)-upper_bd))

                    #append relation
                    neg_r_list = list(ini_r_id_set.difference(set(ini_r_list)))
                    r_ran = random.choice(neg_r_list)
                    x_r_list.append([r_ran])
                    y_list.append(0.)
        
        count += 1
        if count % 100 == 0:
            print('generating big-batches for path-based model', count, len(existing_ids))

### Build the big-batch for the subgraph-based network training

Again, to reduce computational complexity, we store the subgraph of each entity e at the biginning.

* At each step, we will select one triple (s,r,t) from the dataset. Then, reaching out paths of s and t is generated respectively according to their out-going relations.
* We will select three paths for each of source and target entity. Add them to the corresponding list.
* If this is a positive sample step, the id of relation r is appended to the relation list.
* If this is a negative sample step, the id of a random relation is appended to the relation lsit.
* Similarly, one negative sample step always follows one positive step. The one-hop vectors from the previous positve step is used again for the negative step.

In [16]:
#Again, it is too slow to run the path-finding algorithm again and again on the complete FB15K-237
#Instead, we will find the subgraph for each entity once.
#then in the subgraph based training, the subgraphs are stored and used for multiple times
def store_subgraph_dicts(lower_bd, upper_bd, data, one_hop, s_t_r,
                         relation2id, entity2id, id2relation, id2entity):
    
    #the set of all relation IDs
    relation_id_set = set()
    
    for i in range(len(id2relation)):
        
        if i not in id2relation:
            raise ValueError('error when generaing id2relation')
        
        relation_id_set.add(i)
    
    num_r = len(id2relation)
    
    #in case not all entities in entity2id are in one_hop, 
    #so we need to find out who are indeed in
    existing_ids = set()
    
    for s_1 in one_hop:
        existing_ids.add(s_1)
    
    #the ids to start path finding
    existing_ids = list(existing_ids)
    random.shuffle(existing_ids)
    
    #Dict stores the subgraph for each entity
    Dict_1 = dict()
    
    count = 0
    for s in existing_ids:
        
        path_set = set()
            
        result, length_dict = Class_2.obtain_paths('any_target', s, 'any', lower_bd, upper_bd, one_hop)

        for t_ in result:
            for path in result[t_]:
                path_set.add(path)

        del(result, length_dict)
        
        path_list = list(path_set)
        
        path_select = random.sample(path_list, min(len(path_list), 100))
            
        Dict_1[s] = deepcopy(path_select)
        
        count += 1
        if count % 100 == 0:
            print('generating and storing paths for the path-based model', count, len(existing_ids))
        
    return(Dict_1)

In [17]:
#function to build the big-batch for one-hope neighbor training
def build_big_batches_subgraph(lower_bd, upper_bd, data, one_hop, s_t_r,
                      x_s_list, x_t_list, x_r_list, y_list, Dict,
                      relation2id, entity2id, id2relation, id2entity):
    
    #the set of all relation IDs
    relation_id_set = set()
    
    #the set of all initial relations
    ini_r_id_set = set()
    
    for i in range(len(id2relation)):
        
        if i not in id2relation:
            raise ValueError('error when generaing id2relation')
        
        relation_id_set.add(i)
        
        if i % 2 == 0: #initial relation id is always an even number
            ini_r_id_set.add(i)
    
    num_r = len(id2relation)
    num_ini_r = len(ini_r_id_set)
    
    if num_ini_r != int(num_r/2):
        raise ValueError('error when generating id2relation')
        
    #if an entity has at least three out-stretching paths, it is a qualified one
    qualified = set()
    for e in Dict:
        if len(Dict[e]) >= 3:
            qualified.add(e)
    qualified = list(qualified)
    
    data = list(data)
    
    for iteration in range(10):

        data = shuffle(data)

        for i_0 in range(len(data)):

            triple = data[i_0]

            s, r, t = triple[0], triple[1], triple[2] #obtain entities and relation IDs

            if s in qualified and t in qualified:

                #obtain the path list for true entities
                path_s, path_t = list(Dict[s]), list(Dict[t])

                #####positive step###########
                #randomly obtain three paths for true entities
                temp_s = random.sample(path_s, 3)
                temp_t = random.sample(path_t, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(1.)

                #####negative step for relation###########
                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                neg_r_list = list(ini_r_id_set.difference({r}))
                r_ran = random.choice(neg_r_list)
                x_r_list.append([r_ran])
                y_list.append(0.)
                
                ##############################################
                ##############################################
                #randomly choose two negative sampled entities
                s_ran = random.choice(qualified)
                t_ran = random.choice(qualified)

                #obtain the path list for random entities
                path_s_ran, path_t_ran = list(Dict[s_ran]), list(Dict[t_ran])
                
                #####positive step#################
                #Again: randomly obtain three paths
                temp_s = random.sample(path_s, 3)
                temp_t = random.sample(path_t, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(1.)

                #####negative for source entity###########
                #randomly obtain three paths
                temp_s = random.sample(path_s_ran, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(0.)

                #####positive step###########
                #Again: randomly obtain three paths
                temp_s = random.sample(path_s, 3)
                temp_t = random.sample(path_t, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(1.)

                #####negative for target entity###########
                #randomly obtain three paths
                temp_t = random.sample(path_t_ran, 3)
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(0.)

            if i_0 % 200 == 0:
                print('generating big-batches for subgraph-based model', i_0, len(data), iteration)

### Start Training: load the KG and call classes

Here, we use the validation set to see the training efficiency. That is, we use the validation to check whether the true relation between entities can be predicted by paths.

The trick is: in validation, we have to use the same relation ID and entity ID as in the training. But we don't want to use the links in training anymore. That is, in validation, we want to use (and update if necessary) entity2id, id2entity, relation2id and id2relation. But we want to use new one_hop, data, data_ and s_t_r for validation set. Then, path-finding will also be based on new one_hop.


In [18]:
model_name

'Model_SiaLP_3_new_WN18RR_v4'

In [19]:
one_hop_model_name

'One_hop_model_SiaLP_3_new_WN18RR_v4'

In [20]:
ids_name

'IDs_SiaLP_3_new_WN18RR_v4'

In [21]:
#first, we save the relation and ids
Dict = dict()

#save training data
Dict['one_hop'] = one_hop
Dict['data'] = data
Dict['s_t_r'] = s_t_r

#save valid data
Dict['one_hop_valid'] = one_hop_valid
Dict['data_valid'] = data_valid
Dict['s_t_r_valid'] = s_t_r_valid

#save test data
Dict['one_hop_test'] = one_hop_test
Dict['data_test'] = data_test
Dict['s_t_r_test'] = s_t_r_test

#save shared dictionaries
Dict['entity2id'] = entity2id
Dict['id2entity'] = id2entity
Dict['relation2id'] = relation2id
Dict['id2relation'] = id2relation

with open('../weight_bin/' + ids_name + '.pickle', 'wb') as handle:
    pickle.dump(Dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [22]:
###train the path-based model
lower_bd = lower_bound
upper_bd = upper_bound_path
num_epoch = 10
batch_size = 32
        
#define the training lists
train_p_list, train_r_list, train_y_list = {'1': [], '2': [], '3': []}, list(), list()

#define the validation lists
valid_p_list, valid_r_list, valid_y_list = {'1': [], '2': [], '3': []}, list(), list()

#######################################
###build the big-batches###############      

#fill in the training array list
build_big_batches_path(lower_bd, upper_bd, data, one_hop, s_t_r,
                      train_p_list, train_r_list, train_y_list,
                      relation2id, entity2id, id2relation, id2entity)

#fill in the validation array list
build_big_batches_path(lower_bd, upper_bd, data_valid, one_hop_valid, s_t_r_valid,
                      valid_p_list, valid_r_list, valid_y_list,
                      relation2id, entity2id, id2relation, id2entity)    

#######################################
###do the training#####################
#sometimes the validation dataset is so small so sparse, 
#which cannot find three paths between any pair of s and t.
#in such a case, we will divide the training big-batch into train and valid
if len(valid_y_list) >= 100:
    #generate the input arrays
    x_train_1 = np.asarray(train_p_list['1'], dtype='int')
    x_train_2 = np.asarray(train_p_list['2'], dtype='int')
    x_train_3 = np.asarray(train_p_list['3'], dtype='int')
    x_train_r = np.asarray(train_r_list, dtype='int')
    y_train = np.asarray(train_y_list, dtype='int')

    #generate the validation arrays
    x_valid_1 = np.asarray(valid_p_list['1'], dtype='int')
    x_valid_2 = np.asarray(valid_p_list['2'], dtype='int')
    x_valid_3 = np.asarray(valid_p_list['3'], dtype='int')
    x_valid_r = np.asarray(valid_r_list, dtype='int')
    y_valid = np.asarray(valid_y_list, dtype='int')

else:
    split = int(len(train_y_list)*0.8)
    #generate the input arrays
    x_train_1 = np.asarray(train_p_list['1'][:split], dtype='int')
    x_train_2 = np.asarray(train_p_list['2'][:split], dtype='int')
    x_train_3 = np.asarray(train_p_list['3'][:split], dtype='int')
    x_train_r = np.asarray(train_r_list[:split], dtype='int')
    y_train = np.asarray(train_y_list[:split], dtype='int')

    #generate the validation arrays
    x_valid_1 = np.asarray(train_p_list['1'][split:], dtype='int')
    x_valid_2 = np.asarray(train_p_list['2'][split:], dtype='int')
    x_valid_3 = np.asarray(train_p_list['3'][split:], dtype='int')
    x_valid_r = np.asarray(train_r_list[split:], dtype='int')
    y_valid = np.asarray(train_y_list[split:], dtype='int')

#do the training
model.fit([x_train_1, x_train_2, x_train_3, x_train_r], y_train, 
          validation_data=([x_valid_1, x_valid_2, x_valid_3, x_valid_r], y_valid),
          batch_size=batch_size, epochs=num_epoch)   

# Save model and weights
add_h5 = model_name + '.h5'
save_dir = os.path.join(os.getcwd(), '../weight_bin')

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, add_h5)
model.save(model_path)
print('Save model')
del(model)

generating big-batches for path-based model 100 3861
generating big-batches for path-based model 200 3861
generating big-batches for path-based model 300 3861
generating big-batches for path-based model 400 3861
generating big-batches for path-based model 500 3861
generating big-batches for path-based model 600 3861
generating big-batches for path-based model 700 3861
generating big-batches for path-based model 800 3861
generating big-batches for path-based model 900 3861
generating big-batches for path-based model 1000 3861
generating big-batches for path-based model 1100 3861
generating big-batches for path-based model 1200 3861
generating big-batches for path-based model 1300 3861
generating big-batches for path-based model 1400 3861
generating big-batches for path-based model 1500 3861
generating big-batches for path-based model 1600 3861
generating big-batches for path-based model 1700 3861
generating big-batches for path-based model 1800 3861
generating big-batches for path-based

In [23]:
###train the subgraph-based model
lower_bd = lower_bound
upper_bd = upper_bound_subg
num_epoch = 10
batch_size = 32

Dict_train = store_subgraph_dicts(lower_bd, upper_bd, data, one_hop, s_t_r,
                         relation2id, entity2id, id2relation, id2entity)

Dict_valid = store_subgraph_dicts(lower_bd, upper_bd, data_valid, one_hop_valid, s_t_r_valid,
                         relation2id, entity2id, id2relation, id2entity)
        
#define the training lists
train_s_list, train_t_list, train_r_list, train_y_list = {'1': [], '2': [], '3': []}, {'1': [], '2': [], '3': []}, list(), list()

#define the validation lists
valid_s_list, valid_t_list, valid_r_list, valid_y_list = {'1': [], '2': [], '3': []}, {'1': [], '2': [], '3': []}, list(), list()

#######################################
###build the big-batches###############      

#fill in the training array list
build_big_batches_subgraph(lower_bd, upper_bd, data, one_hop, s_t_r,
                      train_s_list, train_t_list, train_r_list, train_y_list, Dict_train,
                      relation2id, entity2id, id2relation, id2entity)

#fill in the validation array list
build_big_batches_subgraph(lower_bd, upper_bd, data_valid, one_hop_valid, s_t_r_valid,
                      valid_s_list, valid_t_list, valid_r_list, valid_y_list, Dict_valid,
                      relation2id, entity2id, id2relation, id2entity)    

#######################################
###do the training#####################
#sometimes the validation dataset is so small so sparse, 
#which cannot find three paths between any pair of s and t.
#in such a case, we will divide the training big-batch into train and valid
if len(valid_y_list) >= 100:
    #generate the input arrays
    x_train_s_1 = np.asarray(train_s_list['1'], dtype='int')
    x_train_s_2 = np.asarray(train_s_list['2'], dtype='int')
    x_train_s_3 = np.asarray(train_s_list['3'], dtype='int')

    x_train_t_1 = np.asarray(train_t_list['1'], dtype='int')
    x_train_t_2 = np.asarray(train_t_list['2'], dtype='int')
    x_train_t_3 = np.asarray(train_t_list['3'], dtype='int')

    x_train_r = np.asarray(train_r_list, dtype='int')
    y_train = np.asarray(train_y_list, dtype='int')

    #generate the validation arrays
    x_valid_s_1 = np.asarray(valid_s_list['1'], dtype='int')
    x_valid_s_2 = np.asarray(valid_s_list['2'], dtype='int')
    x_valid_s_3 = np.asarray(valid_s_list['3'], dtype='int')

    x_valid_t_1 = np.asarray(valid_t_list['1'], dtype='int')
    x_valid_t_2 = np.asarray(valid_t_list['2'], dtype='int')
    x_valid_t_3 = np.asarray(valid_t_list['3'], dtype='int')

    x_valid_r = np.asarray(valid_r_list, dtype='int')
    y_valid = np.asarray(valid_y_list, dtype='int')

else:
    split = int(len(train_y_list)*0.8)
    #generate the input arrays
    x_train_s_1 = np.asarray(train_s_list['1'][:split], dtype='int')
    x_train_s_2 = np.asarray(train_s_list['2'][:split], dtype='int')
    x_train_s_3 = np.asarray(train_s_list['3'][:split], dtype='int')

    x_train_t_1 = np.asarray(train_t_list['1'][:split], dtype='int')
    x_train_t_2 = np.asarray(train_t_list['2'][:split], dtype='int')
    x_train_t_3 = np.asarray(train_t_list['3'][:split], dtype='int')

    x_train_r = np.asarray(train_r_list[:split], dtype='int')
    y_train = np.asarray(train_y_list[:split], dtype='int')

    #generate the validation arrays
    x_valid_s_1 = np.asarray(train_s_list['1'][split:], dtype='int')
    x_valid_s_2 = np.asarray(train_s_list['2'][split:], dtype='int')
    x_valid_s_3 = np.asarray(train_s_list['3'][split:], dtype='int')

    x_valid_t_1 = np.asarray(train_t_list['1'][split:], dtype='int')
    x_valid_t_2 = np.asarray(train_t_list['2'][split:], dtype='int')
    x_valid_t_3 = np.asarray(train_t_list['3'][split:], dtype='int')

    x_valid_r = np.asarray(train_r_list[split:], dtype='int')
    y_valid = np.asarray(train_y_list[split:], dtype='int')

#do the training
model_2.fit([x_train_s_1, x_train_s_2, x_train_s_3, x_train_t_1, x_train_t_2, x_train_t_3, x_train_r], y_train, 
          validation_data=([x_valid_s_1, x_valid_s_2, x_valid_s_3, x_valid_t_1, x_valid_t_2, x_valid_t_3, x_valid_r], y_valid),
          batch_size=batch_size, epochs=num_epoch)

# Save model and weights
one_hop_add_h5 = one_hop_model_name + '.h5'
one_hop_save_dir = os.path.join(os.getcwd(), '../weight_bin')

if not os.path.isdir(one_hop_save_dir):
    os.makedirs(one_hop_save_dir)
one_hop_model_path = os.path.join(one_hop_save_dir, one_hop_add_h5)
model_2.save(one_hop_model_path)
print('Save model')
del(model_2, Dict_train, Dict_valid)

generating and storing paths for the path-based model 100 3861
generating and storing paths for the path-based model 200 3861
generating and storing paths for the path-based model 300 3861
generating and storing paths for the path-based model 400 3861
generating and storing paths for the path-based model 500 3861
generating and storing paths for the path-based model 600 3861
generating and storing paths for the path-based model 700 3861
generating and storing paths for the path-based model 800 3861
generating and storing paths for the path-based model 900 3861
generating and storing paths for the path-based model 1000 3861
generating and storing paths for the path-based model 1100 3861
generating and storing paths for the path-based model 1200 3861
generating and storing paths for the path-based model 1300 3861
generating and storing paths for the path-based model 1400 3861
generating and storing paths for the path-based model 1500 3861
generating and storing paths for the path-based m

generating big-batches for subgraph-based model 1400 7940 2
generating big-batches for subgraph-based model 1600 7940 2
generating big-batches for subgraph-based model 1800 7940 2
generating big-batches for subgraph-based model 2000 7940 2
generating big-batches for subgraph-based model 2200 7940 2
generating big-batches for subgraph-based model 2400 7940 2
generating big-batches for subgraph-based model 2600 7940 2
generating big-batches for subgraph-based model 2800 7940 2
generating big-batches for subgraph-based model 3000 7940 2
generating big-batches for subgraph-based model 3200 7940 2
generating big-batches for subgraph-based model 3400 7940 2
generating big-batches for subgraph-based model 3600 7940 2
generating big-batches for subgraph-based model 3800 7940 2
generating big-batches for subgraph-based model 4000 7940 2
generating big-batches for subgraph-based model 4200 7940 2
generating big-batches for subgraph-based model 4400 7940 2
generating big-batches for subgraph-base

generating big-batches for subgraph-based model 200 7940 6
generating big-batches for subgraph-based model 400 7940 6
generating big-batches for subgraph-based model 600 7940 6
generating big-batches for subgraph-based model 800 7940 6
generating big-batches for subgraph-based model 1000 7940 6
generating big-batches for subgraph-based model 1200 7940 6
generating big-batches for subgraph-based model 1400 7940 6
generating big-batches for subgraph-based model 1600 7940 6
generating big-batches for subgraph-based model 1800 7940 6
generating big-batches for subgraph-based model 2000 7940 6
generating big-batches for subgraph-based model 2200 7940 6
generating big-batches for subgraph-based model 2400 7940 6
generating big-batches for subgraph-based model 2600 7940 6
generating big-batches for subgraph-based model 2800 7940 6
generating big-batches for subgraph-based model 3000 7940 6
generating big-batches for subgraph-based model 3200 7940 6
generating big-batches for subgraph-based mo

generating big-batches for subgraph-based model 4000 7940 9
generating big-batches for subgraph-based model 4200 7940 9
generating big-batches for subgraph-based model 4400 7940 9
generating big-batches for subgraph-based model 4600 7940 9
generating big-batches for subgraph-based model 4800 7940 9
generating big-batches for subgraph-based model 5000 7940 9
generating big-batches for subgraph-based model 5200 7940 9
generating big-batches for subgraph-based model 5400 7940 9
generating big-batches for subgraph-based model 5600 7940 9
generating big-batches for subgraph-based model 5800 7940 9
generating big-batches for subgraph-based model 6000 7940 9
generating big-batches for subgraph-based model 6200 7940 9
generating big-batches for subgraph-based model 6400 7940 9
generating big-batches for subgraph-based model 6600 7940 9
generating big-batches for subgraph-based model 6800 7940 9
generating big-batches for subgraph-based model 7000 7940 9
generating big-batches for subgraph-base

### Result on the testset for inductive link prediction

We use the testset for inductive link prediction.

In [1]:
data_name = 'WN18RR_v4'
model_id = 'SiaLP_3_new'
lower_bound = 1
upper_bound_path = 10
upper_bound_subg = 3

In [2]:
#difine the names for saving
model_name = 'Model_' + model_id + '_' + data_name
one_hop_model_name = 'One_hop_model_' + model_id + '_' + data_name
ids_name = 'IDs_' + model_id + '_' + data_name

In [3]:
ids_name

'IDs_SiaLP_3_new_WN18RR_v4'

In [4]:
one_hop_model_name

'One_hop_model_SiaLP_3_new_WN18RR_v4'

In [5]:
model_name

'Model_SiaLP_3_new_WN18RR_v4'

In [6]:
import librosa
import opensmile
import os
import sys
import numpy as np
import random
import pickle

from collections import defaultdict
from copy import deepcopy
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import initializers
from tensorflow.keras.utils import plot_model

In [7]:
class LoadKG:
    
    def __init__(self):
        
        self.x = 'Hello'
        
    def load_train_data(self, data_path, one_hop, data, s_t_r, entity2id, id2entity,
                     relation2id, id2relation):
        
        data_ = set()
    
        ####load the train, valid and test set##########
        with open (data_path, 'r') as f:
            
            data_ini = f.readlines()
                        
            for i in range(len(data_ini)):
            
                x = data_ini[i].split()
                
                x_ = tuple(x)
                
                data_.add(x_)
        
        ####relation dict#################
        index = len(relation2id)
     
        for key in data_:
            
            if key[1] not in relation2id:
                
                relation = key[1]
                
                relation2id[relation] = index
                
                id2relation[index] = relation
                
                index += 1
                
                #the inverse relation
                iv_r = '_inverse_' + relation
                
                relation2id[iv_r] = index
                
                id2relation[index] = iv_r
                
                index += 1
        
        #get the id of the inverse relation, by above definition, initial relation has 
        #always even id, while inverse relation has always odd id.
        def inverse_r(r):
            
            if r % 2 == 0: #initial relation
                
                iv_r = r + 1
            
            else: #inverse relation
                
                iv_r = r - 1
            
            return(iv_r)
        
        ####entity dict###################
        index = len(entity2id)
        
        for key in data_:
            
            source, target = key[0], key[2]
            
            if source not in entity2id:
                                
                entity2id[source] = index
                
                id2entity[index] = source
                
                index += 1
            
            if target not in entity2id:
                
                entity2id[target] = index
                
                id2entity[index] = target
                
                index += 1
                
        #create the set of triples using id instead of string        
        for ele in data_:
            
            s = entity2id[ele[0]]
            
            r = relation2id[ele[1]]
            
            t = entity2id[ele[2]]
            
            if (s,r,t) not in data:
                
                data.add((s,r,t))
            
            s_t_r[(s,t)].add(r)
            
            if s not in one_hop:
                
                one_hop[s] = set()
            
            one_hop[s].add((r,t))
            
            if t not in one_hop:
                
                one_hop[t] = set()
            
            r_inv = inverse_r(r)
            
            s_t_r[(t,s)].add(r_inv)
            
            one_hop[t].add((r_inv,s))
            
        #change each set in one_hop to list
        for e in one_hop:
            
            one_hop[e] = list(one_hop[e])

In [8]:
class ObtainPathsByDynamicProgramming:

    def __init__(self, amount_bd=50, size_bd=50, threshold=20000):
        
        self.amount_bd = amount_bd #how many Tuples we choose in one_hop[node] for next recursion
                        
        self.size_bd = size_bd #size bound limit the number of paths to a target entity t
        
        #number of times paths with specific length been performed for recursion
        self.threshold = threshold
        
    '''
    Given an entity s, the function will find the paths from s to other entities, using recursion.
    
    One may refer to LeetCode Problem 797 for details:
        https://leetcode.com/problems/all-paths-from-source-to-target/
    '''
    def obtain_paths(self, mode, s, t_input, lower_bd, upper_bd, one_hop):

        if type(lower_bd) != type(1) or lower_bd < 1:
            
            raise TypeError("!!! invalid lower bound setting, must >= 1 !!!")
            
        if type(upper_bd) != type(1) or upper_bd < 1:
            
            raise TypeError("!!! invalid upper bound setting, must >= 1 !!!")
            
        if lower_bd > upper_bd:
            
            raise TypeError("!!! lower bound must not exced upper bound !!!")
            
        if s not in one_hop:
            
            raise ValueError('!!! entity not in one_hop. Please work on existing entities')

        #here is the result dict. Its key is each entity t sharing paths from s
        #The value of each t is a set containing the paths from s to t
        #These paths can be either the direct connection r, or a multi-hop path
        res = defaultdict(set)
        
        #qualified_t contains the types of t we want to consider,
        #that is, what t will be added to the result set.
        qualified_t = set()

        #under this mode, we will only consider the direct neighbour of s
        if mode == 'direct_neighbour':
        
            for Tuple in one_hop[s]:
            
                t = Tuple[1]
                
                qualified_t.add(t)
        
        #under this mode, we will only consider one specified entity t
        elif mode == 'target_specified':
            
            qualified_t.add(t_input)
        
        #under this mode, we will consider any entity
        elif mode == 'any_target':
            
            for s_any in one_hop:
                
                qualified_t.add(s_any)
                
        else:
            
            raise ValueError('not a valid mode')
        
        '''
        We use recursion to find the paths
        On current node with the path [r1, ..., rk] and on-path entities {s, e1, ..., ek-1, node}
        from s to this node, we will further find the direct neighbor t' of this node. 
        If t' is not an on-path entity (not among s, e1,...ek-1, node), we recursively proceed to t' 
        '''
        def helper(node, path, on_path_en, res, qualified_t, lower_bd, upper_bd, one_hop, count_dict):

            #when the current path is within lower_bd and upper_bd, 
            #and the node is among the qualified t, and it has not been fill of paths w.r.t size_limit,
            #we will add this path to the node
            if (len(path) >= lower_bd) and (len(path) <= upper_bd) and (
                node in qualified_t) and (len(res[node]) < self.size_bd):
                
                res[node].add(tuple(path))
                    
            #won't start new recursions if the current path length already reaches upper limit
            #or the number of recursions performed on this length has reached the limit
            if (len(path) < upper_bd) and (count_dict[len(path)] <= self.threshold):
                                
                #temp list is the id list for us to go-over one_hop[node]
                temp_list = [i for i in range(len(one_hop[node]))]
                random.shuffle(temp_list) #so we random-shuffle the list
                
                #only take 20 recursions if there are too many (r,t)
                for i in temp_list[:self.amount_bd]:
                    
                    #obtain tuple of (r,t)
                    Tuple = one_hop[node][i]
                    r, t = Tuple[0], Tuple[1]
                    
                    #add to count_dict even if eventually this step not proceed
                    count_dict[len(path)] += 1
                    
                    #if t not on the path and we not exceed the computation threshold, 
                    #then finally proceed to next recursion
                    if (t not in on_path_en) and (count_dict[len(path)] <= self.threshold):

                        helper(t, path + [r], on_path_en.union({t}), res, qualified_t, 
                               lower_bd, upper_bd, one_hop, count_dict)

        length_dict = defaultdict(int)
        count_dict = defaultdict(int)
        
        helper(s, [], {s}, res, qualified_t, lower_bd, upper_bd, one_hop, count_dict)
        
        return(res, count_dict)

In [9]:
#load the classes
Class_1 = LoadKG()
Class_2 = ObtainPathsByDynamicProgramming()

In [10]:
#load ids and relation/entity dicts
with open('../weight_bin/' + ids_name + '.pickle', 'rb') as handle:
    Dict = pickle.load(handle)
    
#save training data
one_hop = Dict['one_hop']
data = Dict['data']
s_t_r = Dict['s_t_r']

#save valid data
one_hop_valid = Dict['one_hop_valid']
data_valid = Dict['data_valid']
s_t_r_valid = Dict['s_t_r_valid']

#save test data
one_hop_test = Dict['one_hop_test']
data_test = Dict['data_test']
s_t_r_test = Dict['s_t_r_test']

#save shared dictionaries
entity2id = Dict['entity2id']
id2entity = Dict['id2entity']
relation2id = Dict['relation2id']
id2relation = Dict['id2relation']

#we want to keep the initial entity/relation dicts before adding new entities
entity2id_ini = deepcopy(entity2id)
id2entity_ini = deepcopy(id2entity)
relation2id_ini = deepcopy(relation2id)
id2relation_ini = deepcopy(id2relation)

num_r = len(id2relation)
num_r

18

In [11]:
ids_name

'IDs_SiaLP_3_new_WN18RR_v4'

In [12]:
model_name

'Model_SiaLP_3_new_WN18RR_v4'

In [13]:
#load the model
model = keras.models.load_model('../weight_bin/' + model_name + '.h5')

2023-05-15 19:20:35.202344: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
#load the one-hop neighbor model
model_2 = keras.models.load_model('../weight_bin/' + one_hop_model_name + '.h5')

In [15]:
ind_train_path = '../data/' + data_name + '_ind/train.txt'
ind_valid_path = '../data/' + data_name + '_ind/valid.txt'
ind_test_path = '../data/' + data_name + '_ind/test.txt'

In [16]:
#load the test dataset
one_hop_ind = dict() 
data_ind = set()
s_t_r_ind = defaultdict(set)

len_0 = len(relation2id)
size_0 = len(entity2id)

#fill in the sets and dicts
Class_1.load_train_data(ind_train_path, 
                        one_hop_ind, data_ind, s_t_r_ind,
                        entity2id, id2entity, relation2id, id2relation)

len_1 = len(relation2id)
size_1 = len(entity2id)

if len_0 != len_1:
    raise ValueError('unseen relation!')

In [17]:
print(size_0, size_1, len(data_ind))

3861 10945 12334


In [18]:
#load the test dataset
one_hop_ind_test = dict() 
data_ind_test = set()
s_t_r_ind_test = defaultdict(set)

len_0 = len(relation2id)
size_0 = len(entity2id)

#fill in the sets and dicts
Class_1.load_train_data(ind_test_path, 
                        one_hop_ind_test, data_ind_test, s_t_r_ind_test,
                        entity2id, id2entity, relation2id, id2relation)


len_1 = len(relation2id)
size_1 = len(entity2id)

if len_0 != len_1:
    raise ValueError('unseen relation!')

In [19]:
print(size_0, size_1, len(data_ind_test))

10945 10945 1429


In [20]:
#load the validation for existing triple removal when ranking
one_hop_ind_valid = dict() 
data_ind_valid = set()
s_t_r_ind_valid = defaultdict(set)

len_0 = len(relation2id)
size_0 = len(entity2id)

#fill in the sets and dicts
Class_1.load_train_data(ind_valid_path, 
                        one_hop_ind_valid, data_ind_valid, s_t_r_ind_valid,
                        entity2id, id2entity, relation2id, id2relation)

len_1 = len(relation2id)
size_1 = len(entity2id)

if len_0 != len_1:
    raise ValueError('unseen relation!')

In [21]:
print(size_0, size_1, len(data_ind_valid))

10945 10945 1394


In [22]:
print(len(entity2id), len(entity2id_ini))

10945 3861


In [23]:
#obtain all the inital entities and new entities
ini_ent_set, new_ent_set, all_ent_set = set(), set(), set()

for ID in id2entity:
    all_ent_set.add(ID)
    if ID in id2entity_ini:
        ini_ent_set.add(ID)
    else:
        new_ent_set.add(ID)
        
print(len(ini_ent_set), len(new_ent_set), len(all_ent_set))

3861 7084 10945


In [24]:
#we want to check whether there are overlapping 
#between the entities of train triples and inductive test and valid triples
overlapping = 0

for ele in data_ind_test:
    
    s, r, t = ele[0], ele[1], ele[2]
    
    if s in id2entity_ini or t in id2entity_ini:
        
        overlapping += 1
        
overlapping

0

In [25]:
overlapping = 0

for ele in data_ind_valid:
    
    s, r, t = ele[0], ele[1], ele[2]
    
    if s in id2entity_ini or t in id2entity_ini:
        
        overlapping += 1
        
overlapping

0

In [26]:
#we want to check whether there are overlapping 
#between the entities of train triples and inductive test and valid triples
overlapping = 0

for ele in data_ind:
    
    s, r, t = ele[0], ele[1], ele[2]
    
    if s in id2entity_ini or t in id2entity_ini:
        
        overlapping += 1
        
overlapping

0

In [27]:
#the function to do path-based relation scoring
def path_based_relation_scoring(s, t, lower_bd, upper_bd, one_hop, id2relation, model):
    
    path_holder = set()
    
    for iteration in range(3):
    
        result, length_dict = Class_2.obtain_paths('target_specified', 
                                                   s, t, lower_bd, upper_bd, one_hop)
        if t in result:
            
            for path in result[t]:
                
                path_holder.add(path)
                
        del(result, length_dict)
    
    path_holder = list(path_holder)
    random.shuffle(path_holder)
    
    score_dict = defaultdict(float)
    count_dict = defaultdict(int)
    
    count = 0
    
    if len(path_holder) >= 3:
    
        #iterate over path_1
        while count < 10:

            temp_pair = random.sample(path_holder, 3)

            path_1, path_2, path_3 = temp_pair[0], temp_pair[1], temp_pair[2]

            list_1 = list()
            list_2 = list()
            list_3 = list()
            list_r = list()

            for i in range(len(id2relation)):

                if i not in id2relation:

                    raise ValueError ('error when generating id2relation')
                
                #only care about initial relations
                if i % 2 == 0:

                    list_1.append(list(path_1) + [num_r]*abs(len(path_1)-upper_bd))
                    list_2.append(list(path_2) + [num_r]*abs(len(path_2)-upper_bd))
                    list_3.append(list(path_3) + [num_r]*abs(len(path_3)-upper_bd))
                    list_r.append([i])
            
            #change to arrays
            input_1 = np.array(list_1)
            input_2 = np.array(list_2)
            input_3 = np.array(list_3)
            input_r = np.array(list_r)

            pred = model.predict([input_1, input_2, input_3, input_r], verbose = 0)

            for i in range(pred.shape[0]):
                #need to times 2 to go back to relation id from pred position
                score_dict[2*i] += float(pred[i])
                count_dict[2*i] += 1

            count += 1
            
    #average the score
    for r in score_dict:
        score_dict[r] = deepcopy(score_dict[r]/float(count_dict[r]))
    
    print(len(score_dict), len(path_holder))

    return(score_dict)

In [28]:
#the function to do path-based triple scoring: input one triple
def path_based_triple_scoring(s, r, t, lower_bd, upper_bd, one_hop, id2relation, model):
    
    path_holder = set()
    
    for iteration in range(3):
    
        result, length_dict = Class_2.obtain_paths('target_specified', 
                                                   s, t, lower_bd, upper_bd, one_hop)
        if t in result:
            
            for path in result[t]:
                
                path_holder.add(path)
                
        del(result, length_dict)
    
    path_holder = list(path_holder)
    random.shuffle(path_holder)
    
    score = 0.
    count = 0
    
    if len(path_holder) >= 3:
        
        list_1 = list()
        list_2 = list()
        list_3 = list()
        list_r = list()
    
        #iterate over path_1
        while count < 10:

            temp_pair = random.sample(path_holder, 3)
            path_1, path_2, path_3 = temp_pair[0], temp_pair[1], temp_pair[2]

            list_1.append(list(path_1) + [num_r]*abs(len(path_1)-upper_bd))
            list_2.append(list(path_2) + [num_r]*abs(len(path_2)-upper_bd))
            list_3.append(list(path_3) + [num_r]*abs(len(path_3)-upper_bd))
            list_r.append([r])
            
            count += 1
            
        #change to arrays
        input_1 = np.array(list_1)
        input_2 = np.array(list_2)
        input_3 = np.array(list_3)
        input_r = np.array(list_r)

        pred = model.predict([input_1, input_2, input_3, input_r], verbose = 0)

        for i in range(pred.shape[0]):
            score += float(pred[i])
            
        #average the score
        score = score/float(count)

    return(score)

In [29]:
#subgraph based relation scoring
def subgraph_relation_scoring(s, t, lower_bd, upper_bd, one_hop, id2relation, model_2):
    
    path_s, path_t = set(), set() #sets holding all the paths from s or t
    
    for iteration in range(3):
    
        #obtain the paths out from s or t by "any target" mode. That is, 
        result_s, length_dict_s = Class_2.obtain_paths('any_target', s, 'any', lower_bd, upper_bd, one_hop)
        result_t, length_dict_t = Class_2.obtain_paths('any_target', t, 'any', lower_bd, upper_bd, one_hop)

        #add paths to the source/target path_set
        for e in result_s:
            for path in result_s[e]:
                path_s.add(path)
        for e in result_t:
            for path in result_t[e]:
                path_t.add(path)
                
        del(result_s, length_dict_s, result_t, length_dict_t)
    
    #final output: the score dict
    score_dict = defaultdict(float)
    count_dict = defaultdict(int)
    
    #see if both path_s and path_t have at least three paths
    if len(path_s) >= 3 and len(path_t) >= 3:

        #change to lists
        path_s, path_t = list(path_s), list(path_t)
        
        count = 0
        while count < 10:
            
            #lists holding the input to the network
            list_s_1 = list()
            list_s_2 = list()
            list_s_3 = list()
            list_t_1 = list()
            list_t_2 = list()
            list_t_3 = list()
            list_r = list()

            #randomly obtain three paths
            temp_s = random.sample(path_s, 3)
            temp_t = random.sample(path_t, 3)
            s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
            t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]
            
            #add all forward (initial relation)
            for i in range(len(id2relation)):

                if i not in id2relation:

                    raise ValueError ('error when generating id2relation')
                    
                if i % 2 == 0:

                    #append the paths: note that we add the space holder id at the end of the shorter path
                    list_s_1.append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                    list_s_2.append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                    list_s_3.append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))
                    
                    list_t_1.append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                    list_t_2.append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                    list_t_3.append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))
                    
                    list_r.append([i])
                
            #change to arrays
            input_s_1 = np.array(list_s_1)
            input_s_2 = np.array(list_s_2)
            input_s_3 = np.array(list_s_3)
            input_t_1 = np.array(list_t_1)
            input_t_2 = np.array(list_t_2)
            input_t_3 = np.array(list_t_3)
            input_r = np.array(list_r)
            
            pred = model_2.predict([input_s_1, input_s_2, input_s_3,
                                    input_t_1, input_t_2, input_t_3, input_r], verbose = 0)

            for i in range(pred.shape[0]):
                #need to times 2 to go back to relation id from pred position
                score_dict[2*i] += float(pred[i])
                count_dict[2*i] += 1

            count += 1
            
    #average the score
    for r in score_dict:
        score_dict[r] = deepcopy(score_dict[r]/float(count_dict[r]))
            
    print(len(score_dict), len(path_s), len(path_t))
        
    return(score_dict)

In [30]:
#subgraph based triple scoring
def subgraph_triple_scoring(s, r, t, lower_bd, upper_bd, one_hop, id2relation, model_2):
    
    path_s, path_t = set(), set() #sets holding all the paths from s or t
    
    for iteration in range(3):
    
        #obtain the paths out from s or t by "any target" mode. That is, 
        result_s, length_dict_s = Class_2.obtain_paths('any_target', s, 'any', lower_bd, upper_bd, one_hop)
        result_t, length_dict_t = Class_2.obtain_paths('any_target', t, 'any', lower_bd, upper_bd, one_hop)

        #add paths to the source/target path_set
        for e in result_s:
            for path in result_s[e]:
                path_s.add(path)
        for e in result_t:
            for path in result_t[e]:
                path_t.add(path)
                
        del(result_s, length_dict_s, result_t, length_dict_t)
    
    #final output: the score dict
    score = 0.
    
    #see if both path_s and path_t have at least three paths
    if len(path_s) >= 3 and len(path_t) >= 3:

        #change to lists
        path_s, path_t = list(path_s), list(path_t)
        
        #lists holding the input to the network
        list_s_1 = list()
        list_s_2 = list()
        list_s_3 = list()
        list_t_1 = list()
        list_t_2 = list()
        list_t_3 = list()
        list_r = list()
        
        count = 0
        while count < 10:

            #randomly obtain three paths
            temp_s = random.sample(path_s, 3)
            temp_t = random.sample(path_t, 3)
            s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
            t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

            #append the paths: note that we add the space holder id at the end of the shorter path
            list_s_1.append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
            list_s_2.append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
            list_s_3.append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

            list_t_1.append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
            list_t_2.append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
            list_t_3.append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

            list_r.append([r])
            count += 1
                
        #change to arrays
        input_s_1 = np.array(list_s_1)
        input_s_2 = np.array(list_s_2)
        input_s_3 = np.array(list_s_3)
        input_t_1 = np.array(list_t_1)
        input_t_2 = np.array(list_t_2)
        input_t_3 = np.array(list_t_3)
        input_r = np.array(list_r)

        pred = model_2.predict([input_s_1, input_s_2, input_s_3,
                                input_t_1, input_t_2, input_t_3, input_r], verbose = 0)

        for i in range(pred.shape[0]):
            score += float(pred[i])

        #average the score
        score = score/float(count)
        
    return(score)

#### Not fine tuned 

In [31]:
########################################################
#obtain the Hits@N for relation prediction##############

#we select all the triples in the inductive test set
selected = list(data_ind_test)

###Hit at 1#############################
#generate the negative samples by randomly replace relation with all the other relaiton
Hits_at_1 = 0
Hits_at_3 = 0
Hits_at_10 = 0
MRR_raw = 0.

for i in range(len(selected)):
    
    s_true, r_true, t_true = selected[i][0], selected[i][1], selected[i][2]
    
    #run the path-based scoring
    score_dict_path = path_based_relation_scoring(s_true, t_true, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)
    
    #run the one-hop neighbour based scoring
    score_dict_subg = subgraph_relation_scoring(s_true, t_true, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #final score dict
    score_dict = defaultdict(float)
    
    for r in score_dict_path:
        score_dict[r] += score_dict_path[r]
    for r in score_dict_subg:
        score_dict[r] += score_dict_subg[r]
    
    #[... [score, r], ...]
    temp_list = list()
    
    for r in id2relation:
        
        #again, we only care about initial relation prediciton
        if r % 2 == 0:
        
            if r in score_dict:

                temp_list.append([score_dict[r], r])

            else:

                temp_list.append([0.0, r])
        
    sorted_list = sorted(temp_list, key = lambda x: x[0], reverse=True)
    
    p = 0
    exist_tri = 0
    
    while p < len(sorted_list) and sorted_list[p][1] != r_true:
        
        #moreover, we want to remove existing triples
        if ((s_true, sorted_list[p][1], t_true) in data_test) or (
            (s_true, sorted_list[p][1], t_true) in data_valid) or (
            (s_true, sorted_list[p][1], t_true) in data) or (
            (s_true, sorted_list[p][1], t_true) in data_ind) or (
            (s_true, sorted_list[p][1], t_true) in data_ind_valid) or (
            (s_true, sorted_list[p][1], t_true) in data_ind_test):
            
            exist_tri += 1
            
        p += 1
    
    if p - exist_tri == 0:
        
        Hits_at_1 += 1
        
    if p - exist_tri < 3:
        
        Hits_at_3 += 1
        
    if p - exist_tri < 10:
        
        Hits_at_10 += 1
        
    MRR_raw += 1./float(p - exist_tri + 1.) 
        
    print('checkcorrect', r_true, sorted_list[p][1],
          'real score', sorted_list[p][0],
          'Hits@1', Hits_at_1/(i+1),
          'Hits@3', Hits_at_3/(i+1),
          'Hits@10', Hits_at_10/(i+1),
          'MRR', MRR_raw/(i+1),
          'cur_rank', p - exist_tri,
          'abs_cur_rank', p,
          'total_num', i, len(selected))

9 82
9 27 51
checkcorrect 2 2 real score 1.7105379343032836 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 total_num 0 1429
9 132
9 20 26
checkcorrect 2 2 real score 1.6504359662532808 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 total_num 1 1429
9 3
9 25 17
checkcorrect 2 2 real score 1.4635172322392465 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 total_num 2 1429
9 140
9 67 78
checkcorrect 2 2 real score 1.6883579045534134 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 total_num 3 1429
0 1
9 11 32
checkcorrect 2 2 real score 0.7167330622673035 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 total_num 4 1429
9 48
9 62 38
checkcorrect 2 2 real score 1.513030433654785 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 total_num 5 1429
9 150
9 11 44
checkcorrect 4 4 real score 1.268584681302309 Hits@1 1.0 Hits@3 1.0 Hits@10 1.0 MRR 1.0 cur_rank 0 abs_cur_rank 0 tot

9 42
9 34 37
checkcorrect 2 2 real score 1.446180707961321 Hits@1 0.8076923076923077 Hits@3 1.0 Hits@10 1.0 MRR 0.8910256410256411 cur_rank 0 abs_cur_rank 0 total_num 51 1429
9 80
9 67 41
checkcorrect 2 2 real score 1.7618943929672242 Hits@1 0.8113207547169812 Hits@3 1.0 Hits@10 1.0 MRR 0.8930817610062893 cur_rank 0 abs_cur_rank 0 total_num 52 1429
9 25
9 38 34
checkcorrect 2 2 real score 1.6037633210420608 Hits@1 0.8148148148148148 Hits@3 1.0 Hits@10 1.0 MRR 0.8950617283950618 cur_rank 0 abs_cur_rank 0 total_num 53 1429
9 13
9 36 26
checkcorrect 2 2 real score 1.652468004822731 Hits@1 0.8181818181818182 Hits@3 1.0 Hits@10 1.0 MRR 0.896969696969697 cur_rank 0 abs_cur_rank 0 total_num 54 1429
0 1
9 45 69
checkcorrect 2 2 real score 0.5779496625065803 Hits@1 0.8214285714285714 Hits@3 1.0 Hits@10 1.0 MRR 0.8988095238095238 cur_rank 0 abs_cur_rank 0 total_num 55 1429
9 11
9 28 38
checkcorrect 2 2 real score 1.564708411693573 Hits@1 0.8245614035087719 Hits@3 1.0 Hits@10 1.0 MRR 0.9005847953

9 15
9 44 38
checkcorrect 2 2 real score 1.6520869851112365 Hits@1 0.82 Hits@3 1.0 Hits@10 1.0 MRR 0.9 cur_rank 0 abs_cur_rank 0 total_num 99 1429
9 109
9 71 50
checkcorrect 12 12 real score 0.9853025266900659 Hits@1 0.8217821782178217 Hits@3 1.0 Hits@10 1.0 MRR 0.900990099009901 cur_rank 0 abs_cur_rank 0 total_num 100 1429
0 0
9 9 10
checkcorrect 2 2 real score 0.6270277172327041 Hits@1 0.8137254901960784 Hits@3 1.0 Hits@10 1.0 MRR 0.8970588235294118 cur_rank 1 abs_cur_rank 1 total_num 101 1429
0 1
9 24 33
checkcorrect 2 2 real score 0.7370968997478485 Hits@1 0.8155339805825242 Hits@3 1.0 Hits@10 1.0 MRR 0.8980582524271845 cur_rank 0 abs_cur_rank 0 total_num 102 1429
9 128
9 34 33
checkcorrect 2 2 real score 1.6770299941301348 Hits@1 0.8173076923076923 Hits@3 1.0 Hits@10 1.0 MRR 0.8990384615384616 cur_rank 0 abs_cur_rank 0 total_num 103 1429
9 32
9 38 13
checkcorrect 4 4 real score 1.7619756221771241 Hits@1 0.819047619047619 Hits@3 1.0 Hits@10 1.0 MRR 0.9 cur_rank 0 abs_cur_rank 0 tot

9 7 24
checkcorrect 2 2 real score 0.5770882606506348 Hits@1 0.8108108108108109 Hits@3 1.0 Hits@10 1.0 MRR 0.8952702702702703 cur_rank 0 abs_cur_rank 0 total_num 147 1429
9 127
9 78 73
checkcorrect 2 2 real score 1.695519083738327 Hits@1 0.8120805369127517 Hits@3 1.0 Hits@10 1.0 MRR 0.8959731543624161 cur_rank 0 abs_cur_rank 0 total_num 148 1429
9 40
9 39 9
checkcorrect 4 4 real score 1.4162578895688056 Hits@1 0.8133333333333334 Hits@3 1.0 Hits@10 1.0 MRR 0.8966666666666666 cur_rank 0 abs_cur_rank 0 total_num 149 1429
9 46
9 33 17
checkcorrect 2 2 real score 1.3283486366271973 Hits@1 0.8145695364238411 Hits@3 1.0 Hits@10 1.0 MRR 0.8973509933774835 cur_rank 0 abs_cur_rank 0 total_num 150 1429
9 10
9 6 11
checkcorrect 12 12 real score 1.6526967167854307 Hits@1 0.8157894736842105 Hits@3 1.0 Hits@10 1.0 MRR 0.8980263157894737 cur_rank 0 abs_cur_rank 0 total_num 151 1429
9 3
9 21 17
checkcorrect 2 2 real score 1.7367217257618903 Hits@1 0.8169934640522876 Hits@3 1.0 Hits@10 1.0 MRR 0.8986928

9 3
9 11 19
checkcorrect 2 2 real score 1.7140884041786193 Hits@1 0.8256410256410256 Hits@3 1.0 Hits@10 1.0 MRR 0.9042735042735043 cur_rank 0 abs_cur_rank 0 total_num 194 1429
9 9
9 19 28
checkcorrect 2 2 real score 1.7007254481315615 Hits@1 0.826530612244898 Hits@3 1.0 Hits@10 1.0 MRR 0.9047619047619048 cur_rank 0 abs_cur_rank 0 total_num 195 1429
0 1
9 14 9
checkcorrect 2 2 real score 0.6149253867566585 Hits@1 0.8274111675126904 Hits@3 1.0 Hits@10 1.0 MRR 0.9052453468697124 cur_rank 0 abs_cur_rank 0 total_num 196 1429
9 3
9 28 29
checkcorrect 2 2 real score 1.616503268480301 Hits@1 0.8282828282828283 Hits@3 1.0 Hits@10 1.0 MRR 0.9057239057239058 cur_rank 0 abs_cur_rank 0 total_num 197 1429
0 0
9 20 27
checkcorrect 6 6 real score 0.05579934511333704 Hits@1 0.8241206030150754 Hits@3 1.0 Hits@10 1.0 MRR 0.9036850921273032 cur_rank 1 abs_cur_rank 1 total_num 198 1429
9 80
9 48 56
checkcorrect 2 2 real score 1.6802971482276918 Hits@1 0.825 Hits@3 1.0 Hits@10 1.0 MRR 0.9041666666666667 cur

0 0
9 26 4
checkcorrect 4 4 real score 0.12911519333720206 Hits@1 0.8083333333333333 Hits@3 0.9958333333333333 Hits@10 1.0 MRR 0.8950396825396826 cur_rank 1 abs_cur_rank 1 total_num 239 1429
0 0
9 7 17
checkcorrect 4 4 real score 0.6795670330524445 Hits@1 0.8091286307053942 Hits@3 0.995850622406639 Hits@10 1.0 MRR 0.8954752025291445 cur_rank 0 abs_cur_rank 0 total_num 240 1429
0 1
9 15 8
checkcorrect 2 2 real score 0.6745277762413024 Hits@1 0.8099173553719008 Hits@3 0.9958677685950413 Hits@10 1.0 MRR 0.8959071231798506 cur_rank 0 abs_cur_rank 0 total_num 241 1429
9 74
9 11 34
checkcorrect 2 2 real score 0.8555488228797912 Hits@1 0.8065843621399177 Hits@3 0.9958847736625515 Hits@10 1.0 MRR 0.8942778757593574 cur_rank 1 abs_cur_rank 1 total_num 242 1429
9 19
9 40 35
checkcorrect 2 2 real score 1.7790147423744203 Hits@1 0.8073770491803278 Hits@3 0.9959016393442623 Hits@10 1.0 MRR 0.8947111631537862 cur_rank 0 abs_cur_rank 0 total_num 243 1429
9 150
9 50 43
checkcorrect 2 2 real score 1.67

9 139
9 48 34
checkcorrect 2 2 real score 1.287501997500658 Hits@1 0.8133802816901409 Hits@3 0.9964788732394366 Hits@10 1.0 MRR 0.8989771965124078 cur_rank 0 abs_cur_rank 0 total_num 283 1429
0 1
9 17 30
checkcorrect 2 2 real score 0.6149459898471832 Hits@1 0.8105263157894737 Hits@3 0.9964912280701754 Hits@10 1.0 MRR 0.897577276524645 cur_rank 1 abs_cur_rank 1 total_num 284 1429
0 1
9 25 11
checkcorrect 2 2 real score 0.7595217287540436 Hits@1 0.8111888111888111 Hits@3 0.9965034965034965 Hits@10 1.0 MRR 0.8979353979353981 cur_rank 0 abs_cur_rank 0 total_num 285 1429
0 0
9 7 29
checkcorrect 4 4 real score 0.4225573420524597 Hits@1 0.8118466898954704 Hits@3 0.9965156794425087 Hits@10 1.0 MRR 0.898291023726564 cur_rank 0 abs_cur_rank 0 total_num 286 1429
9 8
9 9 15
checkcorrect 4 4 real score 1.243815329670906 Hits@1 0.8090277777777778 Hits@3 0.9965277777777778 Hits@10 1.0 MRR 0.896908068783069 cur_rank 1 abs_cur_rank 1 total_num 287 1429
9 128
9 20 49
checkcorrect 4 4 real score 1.074629

9 5 11
checkcorrect 4 4 real score 0.6062650859355927 Hits@1 0.8140243902439024 Hits@3 0.9939024390243902 Hits@10 1.0 MRR 0.8991906213704995 cur_rank 0 abs_cur_rank 0 total_num 327 1429
9 22
9 33 30
checkcorrect 2 2 real score 1.7356524825096131 Hits@1 0.8145896656534954 Hits@3 0.993920972644377 Hits@10 1.0 MRR 0.8994970328556956 cur_rank 0 abs_cur_rank 0 total_num 328 1429
0 0
9 15 24
checkcorrect 2 2 real score 0.5901044756174088 Hits@1 0.8121212121212121 Hits@3 0.9939393939393939 Hits@10 1.0 MRR 0.898286435786436 cur_rank 1 abs_cur_rank 1 total_num 329 1429
9 104
9 21 66
checkcorrect 4 4 real score 0.8491712868213653 Hits@1 0.8096676737160121 Hits@3 0.9939577039274925 Hits@10 1.0 MRR 0.8970831535030932 cur_rank 1 abs_cur_rank 1 total_num 330 1429
9 64
9 37 38
checkcorrect 2 2 real score 1.6598287612199782 Hits@1 0.8102409638554217 Hits@3 0.9939759036144579 Hits@10 1.0 MRR 0.8973931440045899 cur_rank 0 abs_cur_rank 0 total_num 331 1429
0 0
9 35 9
checkcorrect 4 4 real score 0.6883936

9 21 27
checkcorrect 4 4 real score 0.3668560579419136 Hits@1 0.8194070080862533 Hits@3 0.9919137466307277 Hits@10 1.0 MRR 0.9018900012835325 cur_rank 1 abs_cur_rank 1 total_num 370 1429
0 1
9 16 39
checkcorrect 2 2 real score 0.6074461147189141 Hits@1 0.8172043010752689 Hits@3 0.9919354838709677 Hits@10 1.0 MRR 0.9008096518177166 cur_rank 1 abs_cur_rank 1 total_num 371 1429
0 1
9 24 23
checkcorrect 2 2 real score 0.6779664874076843 Hits@1 0.8176943699731903 Hits@3 0.9919571045576407 Hits@10 1.0 MRR 0.9010755776841569 cur_rank 0 abs_cur_rank 0 total_num 372 1429
0 0
9 8 9
checkcorrect 2 2 real score 0.5555479049682617 Hits@1 0.8181818181818182 Hits@3 0.9919786096256684 Hits@10 1.0 MRR 0.9013400814871405 cur_rank 0 abs_cur_rank 0 total_num 373 1429
0 2
9 17 12
checkcorrect 2 2 real score 0.4604423552751541 Hits@1 0.8186666666666667 Hits@3 0.992 Hits@10 1.0 MRR 0.9016031746031747 cur_rank 0 abs_cur_rank 0 total_num 374 1429
0 1
9 48 55
checkcorrect 8 8 real score 0.7374479651451111 Hits@

9 22 12
checkcorrect 2 2 real score 0.7174268960952759 Hits@1 0.8265060240963855 Hits@3 0.9927710843373494 Hits@10 1.0 MRR 0.9062679288582904 cur_rank 0 abs_cur_rank 0 total_num 414 1429
9 30
9 48 10
checkcorrect 4 4 real score 1.5874899625778198 Hits@1 0.8269230769230769 Hits@3 0.9927884615384616 Hits@10 1.0 MRR 0.9064932463369965 cur_rank 0 abs_cur_rank 0 total_num 415 1429
9 6
9 17 12
checkcorrect 6 6 real score 0.6300248694140463 Hits@1 0.8249400479616307 Hits@3 0.9928057553956835 Hits@10 1.0 MRR 0.9055184423889462 cur_rank 1 abs_cur_rank 1 total_num 416 1429
0 1
9 13 10
checkcorrect 2 2 real score 0.5064633801579476 Hits@1 0.8229665071770335 Hits@3 0.992822966507177 Hits@10 1.0 MRR 0.9045483025746185 cur_rank 1 abs_cur_rank 1 total_num 417 1429
9 67
9 46 54
checkcorrect 2 2 real score 1.5293253913521767 Hits@1 0.8233890214797136 Hits@3 0.9928400954653938 Hits@10 1.0 MRR 0.9047761109216957 cur_rank 0 abs_cur_rank 0 total_num 418 1429
9 150
9 38 49
checkcorrect 4 4 real score 1.4721

9 7
9 29 21
checkcorrect 2 2 real score 1.509934848546982 Hits@1 0.8213507625272332 Hits@3 0.9912854030501089 Hits@10 1.0 MRR 0.902616972715012 cur_rank 0 abs_cur_rank 0 total_num 458 1429
0 1
9 16 32
checkcorrect 2 2 real score 0.8360435247421265 Hits@1 0.8217391304347826 Hits@3 0.991304347826087 Hits@10 1.0 MRR 0.9028286749482402 cur_rank 0 abs_cur_rank 0 total_num 459 1429
9 19
9 34 45
checkcorrect 2 2 real score 1.5903243094682693 Hits@1 0.8221258134490239 Hits@3 0.9913232104121475 Hits@10 1.0 MRR 0.9030394587336019 cur_rank 0 abs_cur_rank 0 total_num 460 1429
9 29
9 27 20
checkcorrect 2 2 real score 1.5993161663413047 Hits@1 0.8225108225108225 Hits@3 0.9913419913419913 Hits@10 1.0 MRR 0.9032493300350443 cur_rank 0 abs_cur_rank 0 total_num 461 1429
9 6
9 20 21
checkcorrect 8 8 real score 1.8900256156921387 Hits@1 0.8228941684665226 Hits@3 0.9913606911447084 Hits@10 1.0 MRR 0.9034582947649902 cur_rank 0 abs_cur_rank 0 total_num 462 1429
9 150
9 49 34
checkcorrect 2 2 real score 1.56

9 150
9 27 16
checkcorrect 4 4 real score 1.2992744833230971 Hits@1 0.8230616302186878 Hits@3 0.9920477137176938 Hits@10 1.0 MRR 0.9035146265265549 cur_rank 0 abs_cur_rank 0 total_num 502 1429
9 30
9 48 25
checkcorrect 2 2 real score 1.2682031832635403 Hits@1 0.8234126984126984 Hits@3 0.9920634920634921 Hits@10 1.0 MRR 0.9037060657596371 cur_rank 0 abs_cur_rank 0 total_num 503 1429
9 110
9 50 42
checkcorrect 2 2 real score 1.6142323076725007 Hits@1 0.8237623762376237 Hits@3 0.9920792079207921 Hits@10 1.0 MRR 0.9038967468175388 cur_rank 0 abs_cur_rank 0 total_num 504 1429
0 1
9 29 31
checkcorrect 2 2 real score 0.6114012420177459 Hits@1 0.8241106719367589 Hits@3 0.9920948616600791 Hits@10 1.0 MRR 0.9040866741953698 cur_rank 0 abs_cur_rank 0 total_num 505 1429
9 3
9 4 30
checkcorrect 2 2 real score 1.32906836271286 Hits@1 0.8244575936883629 Hits@3 0.9921104536489151 Hits@10 1.0 MRR 0.9042758523527753 cur_rank 0 abs_cur_rank 0 total_num 506 1429
9 118
9 54 35
checkcorrect 2 2 real score 1

9 150
9 29 31
checkcorrect 2 2 real score 1.3266843020915986 Hits@1 0.836996336996337 Hits@3 0.9926739926739927 Hits@10 1.0 MRR 0.9111132914704343 cur_rank 0 abs_cur_rank 0 total_num 545 1429
0 0
9 23 50
checkcorrect 4 4 real score 0.33324808329343797 Hits@1 0.8354661791590493 Hits@3 0.9926873857404022 Hits@10 1.0 MRR 0.910361713241055 cur_rank 1 abs_cur_rank 1 total_num 546 1429
9 4
9 34 34
checkcorrect 4 4 real score 0.31503572687506676 Hits@1 0.833941605839416 Hits@3 0.9927007299270073 Hits@10 1.0 MRR 0.9096128779979145 cur_rank 1 abs_cur_rank 1 total_num 547 1429
9 150
9 97 106
checkcorrect 2 2 real score 1.615291252732277 Hits@1 0.8342440801457195 Hits@3 0.9927140255009107 Hits@10 1.0 MRR 0.9097775175644027 cur_rank 0 abs_cur_rank 0 total_num 548 1429
0 0
9 7 23
checkcorrect 2 2 real score 0.4647306650876999 Hits@1 0.8327272727272728 Hits@3 0.9927272727272727 Hits@10 1.0 MRR 0.9090324675324675 cur_rank 1 abs_cur_rank 1 total_num 549 1429
9 3
9 34 22
checkcorrect 2 2 real score 1.7

9 107
9 30 24
checkcorrect 2 2 real score 1.7004158824682236 Hits@1 0.8305084745762712 Hits@3 0.9898305084745763 Hits@10 1.0 MRR 0.9070076674737692 cur_rank 0 abs_cur_rank 0 total_num 589 1429
0 1
9 9 28
checkcorrect 2 2 real score 0.7299530863761902 Hits@1 0.8307952622673435 Hits@3 0.9898477157360406 Hits@10 1.0 MRR 0.9071650149061317 cur_rank 0 abs_cur_rank 0 total_num 590 1429
9 149
9 32 40
checkcorrect 4 4 real score 0.8240464337170124 Hits@1 0.8293918918918919 Hits@3 0.9898648648648649 Hits@10 1.0 MRR 0.9064772361647361 cur_rank 1 abs_cur_rank 1 total_num 591 1429
0 0
9 26 21
checkcorrect 4 4 real score 0.6731469571590424 Hits@1 0.8296795952782462 Hits@3 0.9898819561551433 Hits@10 1.0 MRR 0.9066349474022324 cur_rank 0 abs_cur_rank 0 total_num 592 1429
0 0
9 19 20
checkcorrect 4 4 real score 0.27638804316520693 Hits@1 0.8282828282828283 Hits@3 0.98989898989899 Hits@10 1.0 MRR 0.90595037678371 cur_rank 1 abs_cur_rank 1 total_num 593 1429
9 80
9 40 20
checkcorrect 2 2 real score 1.66

9 89
9 34 35
checkcorrect 2 2 real score 1.6482810094952582 Hits@1 0.831230283911672 Hits@3 0.9889589905362776 Hits@10 1.0 MRR 0.9075465675229082 cur_rank 0 abs_cur_rank 0 total_num 633 1429
9 5
9 27 33
checkcorrect 2 2 real score 1.6472975462675095 Hits@1 0.831496062992126 Hits@3 0.988976377952756 Hits@10 1.0 MRR 0.907692163479565 cur_rank 0 abs_cur_rank 0 total_num 634 1429
0 0
9 4 34
checkcorrect 4 4 real score 0.17469455823302268 Hits@1 0.8301886792452831 Hits@3 0.9889937106918238 Hits@10 1.0 MRR 0.9067890835579515 cur_rank 2 abs_cur_rank 2 total_num 635 1429
9 5
9 50 43
checkcorrect 2 2 real score 1.7853467255830764 Hits@1 0.8304552590266876 Hits@3 0.989010989010989 Hits@10 1.0 MRR 0.9069354115272483 cur_rank 0 abs_cur_rank 0 total_num 636 1429
9 3
9 10 17
checkcorrect 2 2 real score 1.4094659995287657 Hits@1 0.8307210031347962 Hits@3 0.9890282131661442 Hits@10 1.0 MRR 0.9070812807881774 cur_rank 0 abs_cur_rank 0 total_num 637 1429
0 0
0 68 2
checkcorrect 8 8 real score 0.0 Hits@1

9 16 12
checkcorrect 2 2 real score 0.6204543352127075 Hits@1 0.827433628318584 Hits@3 0.9867256637168141 Hits@10 1.0 MRR 0.9046846467200451 cur_rank 0 abs_cur_rank 0 total_num 677 1429
0 1
9 11 24
checkcorrect 2 2 real score 0.6751437664031983 Hits@1 0.8276877761413843 Hits@3 0.9867452135493373 Hits@10 1.0 MRR 0.9048250227926223 cur_rank 0 abs_cur_rank 0 total_num 678 1429
9 150
9 32 22
checkcorrect 2 2 real score 1.7086905539035797 Hits@1 0.8279411764705882 Hits@3 0.986764705882353 Hits@10 1.0 MRR 0.9049649859943979 cur_rank 0 abs_cur_rank 0 total_num 679 1429
9 18
9 40 35
checkcorrect 2 2 real score 1.5949193440377711 Hits@1 0.8281938325991189 Hits@3 0.986784140969163 Hits@10 1.0 MRR 0.9051045381441859 cur_rank 0 abs_cur_rank 0 total_num 680 1429
9 122
9 62 34
checkcorrect 2 2 real score 1.411210760474205 Hits@1 0.8284457478005866 Hits@3 0.9868035190615836 Hits@10 1.0 MRR 0.9052436810501328 cur_rank 0 abs_cur_rank 0 total_num 681 1429
9 36
9 19 19
checkcorrect 4 4 real score 1.20249

9 108
9 82 31
checkcorrect 2 2 real score 1.7043734282255172 Hits@1 0.8310249307479224 Hits@3 0.9875346260387812 Hits@10 1.0 MRR 0.9063382139559428 cur_rank 0 abs_cur_rank 0 total_num 721 1429
0 1
9 26 13
checkcorrect 2 2 real score 0.6009637445211411 Hits@1 0.8312586445366529 Hits@3 0.9875518672199171 Hits@10 1.0 MRR 0.9064677599947313 cur_rank 0 abs_cur_rank 0 total_num 722 1429
9 62
9 24 54
checkcorrect 2 2 real score 1.6997408390045166 Hits@1 0.8314917127071824 Hits@3 0.9875690607734806 Hits@10 1.0 MRR 0.9065969481715341 cur_rank 0 abs_cur_rank 0 total_num 723 1429
9 13
9 51 19
checkcorrect 2 2 real score 1.771595114469528 Hits@1 0.8317241379310345 Hits@3 0.9875862068965517 Hits@10 1.0 MRR 0.9067257799671595 cur_rank 0 abs_cur_rank 0 total_num 724 1429
0 0
9 39 4
checkcorrect 4 4 real score 0.35961039662361144 Hits@1 0.8305785123966942 Hits@3 0.987603305785124 Hits@10 1.0 MRR 0.9061655516200974 cur_rank 1 abs_cur_rank 1 total_num 725 1429
9 110
9 42 34
checkcorrect 2 2 real score 1

9 17
9 22 34
checkcorrect 2 2 real score 1.7355232775211333 Hits@1 0.8379084967320262 Hits@3 0.9882352941176471 Hits@10 1.0 MRR 0.9102956738250859 cur_rank 0 abs_cur_rank 0 total_num 764 1429
9 124
9 75 51
checkcorrect 2 2 real score 1.768509942293167 Hits@1 0.8381201044386423 Hits@3 0.9882506527415144 Hits@10 1.0 MRR 0.9104127813005101 cur_rank 0 abs_cur_rank 0 total_num 765 1429
9 127
9 26 26
checkcorrect 2 2 real score 1.6891122937202454 Hits@1 0.8383311603650587 Hits@3 0.9882659713168188 Hits@10 1.0 MRR 0.9105295834109396 cur_rank 0 abs_cur_rank 0 total_num 766 1429
0 0
9 12 11
checkcorrect 4 4 real score 0.7565718650817871 Hits@1 0.8385416666666666 Hits@3 0.98828125 Hits@10 1.0 MRR 0.9106460813492067 cur_rank 0 abs_cur_rank 0 total_num 767 1429
9 9
9 28 32
checkcorrect 2 2 real score 1.619897073507309 Hits@1 0.8387516254876463 Hits@3 0.988296488946684 Hits@10 1.0 MRR 0.9107622763019385 cur_rank 0 abs_cur_rank 0 total_num 768 1429
0 1
9 32 11
checkcorrect 2 2 real score 0.738732185

9 21
9 23 25
checkcorrect 2 2 real score 1.6872417986392976 Hits@1 0.8393077873918418 Hits@3 0.9888751545117429 Hits@10 1.0 MRR 0.9108481958914596 cur_rank 0 abs_cur_rank 0 total_num 808 1429
9 150
9 39 56
checkcorrect 2 2 real score 1.647916752099991 Hits@1 0.8395061728395061 Hits@3 0.9888888888888889 Hits@10 1.0 MRR 0.9109582598471492 cur_rank 0 abs_cur_rank 0 total_num 809 1429
9 19
9 33 28
checkcorrect 6 6 real score 0.531971774995327 Hits@1 0.8384710234278668 Hits@3 0.9889025893958077 Hits@10 1.0 MRR 0.9102460219599559 cur_rank 2 abs_cur_rank 2 total_num 810 1429
9 6
9 18 22
checkcorrect 4 4 real score 1.5538138180971146 Hits@1 0.8386699507389163 Hits@3 0.9889162561576355 Hits@10 1.0 MRR 0.9103565564156701 cur_rank 0 abs_cur_rank 0 total_num 811 1429
9 14
9 32 27
checkcorrect 2 2 real score 1.6344577223062515 Hits@1 0.8388683886838868 Hits@3 0.988929889298893 Hits@10 1.0 MRR 0.9104668189539042 cur_rank 0 abs_cur_rank 0 total_num 812 1429
0 0
9 19 6
checkcorrect 6 6 real score 0.66

0 2
9 21 7
checkcorrect 4 4 real score 0.3183135390281677 Hits@1 0.8370457209847597 Hits@3 0.9882766705744431 Hits@10 1.0 MRR 0.9090967453804505 cur_rank 1 abs_cur_rank 1 total_num 852 1429
9 5
9 9 29
checkcorrect 4 4 real score 1.3590140491724014 Hits@1 0.8372365339578455 Hits@3 0.9882903981264637 Hits@10 1.0 MRR 0.9092031894725109 cur_rank 0 abs_cur_rank 0 total_num 853 1429
0 0
9 22 20
checkcorrect 4 4 real score 0.05655923932790756 Hits@1 0.8362573099415205 Hits@3 0.9871345029239766 Hits@10 1.0 MRR 0.9084321915900869 cur_rank 3 abs_cur_rank 3 total_num 854 1429
9 75
9 13 13
checkcorrect 2 2 real score 1.4445250302553176 Hits@1 0.8364485981308412 Hits@3 0.9871495327102804 Hits@10 1.0 MRR 0.9085391633288835 cur_rank 0 abs_cur_rank 0 total_num 855 1429
9 50
9 27 22
checkcorrect 2 2 real score 1.745333456993103 Hits@1 0.8366394399066511 Hits@3 0.9871645274212368 Hits@10 1.0 MRR 0.9086458854253492 cur_rank 0 abs_cur_rank 0 total_num 856 1429
9 5
9 11 11
checkcorrect 2 2 real score 1.491

9 5
9 46 37
checkcorrect 2 2 real score 1.3853464948013425 Hits@1 0.8363028953229399 Hits@3 0.987750556792873 Hits@10 1.0 MRR 0.9085480962986537 cur_rank 0 abs_cur_rank 0 total_num 897 1429
9 150
9 28 44
checkcorrect 4 4 real score 1.4809225976467133 Hits@1 0.8364849833147943 Hits@3 0.9877641824249166 Hits@10 1.0 MRR 0.9086498225541613 cur_rank 0 abs_cur_rank 0 total_num 898 1429
9 28
9 55 44
checkcorrect 2 2 real score 1.6536529421806336 Hits@1 0.8366666666666667 Hits@3 0.9877777777777778 Hits@10 1.0 MRR 0.9087513227513233 cur_rank 0 abs_cur_rank 0 total_num 899 1429
0 2
9 28 14
checkcorrect 2 2 real score 0.5827647117897868 Hits@1 0.8368479467258602 Hits@3 0.9877913429522752 Hits@10 1.0 MRR 0.9088525976428313 cur_rank 0 abs_cur_rank 0 total_num 900 1429
0 1
9 21 15
checkcorrect 2 2 real score 0.625055468082428 Hits@1 0.8370288248337029 Hits@3 0.9878048780487805 Hits@10 1.0 MRR 0.9089536479780388 cur_rank 0 abs_cur_rank 0 total_num 901 1429
9 33
9 38 55
checkcorrect 0 0 real score 1.9

9 37
9 51 31
checkcorrect 2 2 real score 0.8793541844468563 Hits@1 0.8374070138150903 Hits@3 0.9883103081827843 Hits@10 1.0 MRR 0.9091847578563844 cur_rank 1 abs_cur_rank 1 total_num 940 1429
9 62
9 13 14
checkcorrect 2 2 real score 1.5379805222153664 Hits@1 0.8375796178343949 Hits@3 0.9883227176220807 Hits@10 1.0 MRR 0.9092811646951781 cur_rank 0 abs_cur_rank 0 total_num 941 1429
9 102
9 54 43
checkcorrect 2 2 real score 1.2194223137572409 Hits@1 0.8377518557794273 Hits@3 0.9883351007423118 Hits@10 1.0 MRR 0.9093773670655968 cur_rank 0 abs_cur_rank 0 total_num 942 1429
9 81
9 22 31
checkcorrect 4 4 real score 1.2496580969542266 Hits@1 0.8379237288135594 Hits@3 0.9883474576271186 Hits@10 1.0 MRR 0.9094733656174341 cur_rank 0 abs_cur_rank 0 total_num 943 1429
0 1
9 21 13
checkcorrect 2 2 real score 0.6839232355356216 Hits@1 0.8380952380952381 Hits@3 0.9883597883597883 Hits@10 1.0 MRR 0.9095691609977331 cur_rank 0 abs_cur_rank 0 total_num 944 1429
0 0
9 10 5
checkcorrect 2 2 real score 0

9 39
9 33 25
checkcorrect 2 2 real score 1.4670280814170837 Hits@1 0.8365482233502538 Hits@3 0.9888324873096447 Hits@10 1.0 MRR 0.9088421561518015 cur_rank 0 abs_cur_rank 0 total_num 984 1429
0 1
9 9 23
checkcorrect 8 8 real score 0.5801510095596314 Hits@1 0.8367139959432048 Hits@3 0.9888438133874239 Hits@10 1.0 MRR 0.9089346083260897 cur_rank 0 abs_cur_rank 0 total_num 985 1429
9 107
9 49 46
checkcorrect 2 2 real score 1.478215529024601 Hits@1 0.8368794326241135 Hits@3 0.9888551165146909 Hits@10 1.0 MRR 0.9090268731606125 cur_rank 0 abs_cur_rank 0 total_num 986 1429
9 24
9 46 41
checkcorrect 4 4 real score 1.31459898352623 Hits@1 0.8370445344129555 Hits@3 0.9888663967611336 Hits@10 1.0 MRR 0.909118951224215 cur_rank 0 abs_cur_rank 0 total_num 987 1429
9 51
9 20 47
checkcorrect 2 2 real score 1.3188711822032928 Hits@1 0.8372093023255814 Hits@3 0.9888776541961577 Hits@10 1.0 MRR 0.9092108430834424 cur_rank 0 abs_cur_rank 0 total_num 988 1429
0 1
9 8 23
checkcorrect 2 2 real score 0.7153

9 9
9 29 50
checkcorrect 4 4 real score 0.7684495586901903 Hits@1 0.8357628765792031 Hits@3 0.9883381924198251 Hits@10 1.0 MRR 0.9079889552192767 cur_rank 1 abs_cur_rank 1 total_num 1028 1429
9 123
9 12 38
checkcorrect 2 2 real score 1.7450945556163788 Hits@1 0.8359223300970874 Hits@3 0.9883495145631068 Hits@10 1.0 MRR 0.9080782863307143 cur_rank 0 abs_cur_rank 0 total_num 1029 1429
0 1
9 16 8
checkcorrect 2 2 real score 0.7863614648580551 Hits@1 0.8360814742967992 Hits@3 0.988360814742968 Hits@10 1.0 MRR 0.908167444151926 cur_rank 0 abs_cur_rank 0 total_num 1030 1429
9 90
9 42 24
checkcorrect 2 2 real score 1.6621249318122864 Hits@1 0.8362403100775194 Hits@3 0.9883720930232558 Hits@10 1.0 MRR 0.9082564291866625 cur_rank 0 abs_cur_rank 0 total_num 1031 1429
0 1
9 21 24
checkcorrect 2 2 real score 0.6178714856505394 Hits@1 0.8363988383349468 Hits@3 0.9883833494675702 Hits@10 1.0 MRR 0.9083452419367238 cur_rank 0 abs_cur_rank 0 total_num 1032 1429
9 150
9 28 40
checkcorrect 2 2 real scor

0 1
9 51 33
checkcorrect 2 2 real score 0.724731320142746 Hits@1 0.8367537313432836 Hits@3 0.9878731343283582 Hits@10 1.0 MRR 0.908492507699598 cur_rank 0 abs_cur_rank 0 total_num 1071 1429
9 4
9 18 24
checkcorrect 4 4 real score 1.1802813738584517 Hits@1 0.83690587138863 Hits@3 0.9878844361602982 Hits@10 1.0 MRR 0.9085777896122731 cur_rank 0 abs_cur_rank 0 total_num 1072 1429
9 9
9 9 16
checkcorrect 2 2 real score 1.7656392991542815 Hits@1 0.8370577281191807 Hits@3 0.9878957169459963 Hits@10 1.0 MRR 0.9086629127131928 cur_rank 0 abs_cur_rank 0 total_num 1073 1429
0 1
9 38 31
checkcorrect 2 2 real score 0.7105143159627915 Hits@1 0.8372093023255814 Hits@3 0.987906976744186 Hits@10 1.0 MRR 0.9087478774455526 cur_rank 0 abs_cur_rank 0 total_num 1074 1429
0 0
9 26 15
checkcorrect 6 6 real score 0.18475473672151566 Hits@1 0.8364312267657993 Hits@3 0.9879182156133829 Hits@10 1.0 MRR 0.9083680002360307 cur_rank 1 abs_cur_rank 1 total_num 1075 1429
9 94
9 46 38
checkcorrect 2 2 real score 1.63

9 21 5
checkcorrect 4 4 real score 0.5218293458223343 Hits@1 0.8322869955156951 Hits@3 0.9874439461883409 Hits@10 1.0 MRR 0.9062666381948901 cur_rank 0 abs_cur_rank 0 total_num 1114 1429
0 1
9 19 42
checkcorrect 2 2 real score 0.6520474642515183 Hits@1 0.8324372759856631 Hits@3 0.9874551971326165 Hits@10 1.0 MRR 0.9063506286624574 cur_rank 0 abs_cur_rank 0 total_num 1115 1429
0 1
9 12 24
checkcorrect 2 2 real score 0.664915531873703 Hits@1 0.8325872873769025 Hits@3 0.9874664279319606 Hits@10 1.0 MRR 0.9064344687442278 cur_rank 0 abs_cur_rank 0 total_num 1116 1429
9 9
9 26 36
checkcorrect 2 2 real score 1.6147291958332062 Hits@1 0.832737030411449 Hits@3 0.9874776386404294 Hits@10 1.0 MRR 0.906518158843741 cur_rank 0 abs_cur_rank 0 total_num 1117 1429
9 68
9 28 37
checkcorrect 12 12 real score 1.385830594599247 Hits@1 0.8328865058087578 Hits@3 0.9874888293118856 Hits@10 1.0 MRR 0.9066016993630942 cur_rank 0 abs_cur_rank 0 total_num 1118 1429
9 58
9 34 36
checkcorrect 0 0 real score 1.517

9 150
9 58 41
checkcorrect 12 12 real score 0.909848871268332 Hits@1 0.8307426597582038 Hits@3 0.9853195164075993 Hits@10 1.0 MRR 0.9048057680182039 cur_rank 1 abs_cur_rank 1 total_num 1157 1429
9 8
9 30 11
checkcorrect 4 4 real score 1.6146752536296844 Hits@1 0.8308886971527178 Hits@3 0.9853321829163072 Hits@10 1.0 MRR 0.9048879028171528 cur_rank 0 abs_cur_rank 0 total_num 1158 1429
0 0
9 31 6
checkcorrect 2 2 real score 0.23510067258030176 Hits@1 0.8310344827586207 Hits@3 0.9853448275862069 Hits@10 1.0 MRR 0.9049698960043794 cur_rank 0 abs_cur_rank 0 total_num 1159 1429
0 1
9 27 12
checkcorrect 0 0 real score 0.9267962217330933 Hits@1 0.8311800172265289 Hits@3 0.9853574504737296 Hits@10 1.0 MRR 0.9050517479458053 cur_rank 0 abs_cur_rank 0 total_num 1160 1429
9 33
9 22 33
checkcorrect 2 2 real score 1.7165506660938263 Hits@1 0.8313253012048193 Hits@3 0.9853700516351118 Hits@10 1.0 MRR 0.905133459006093 cur_rank 0 abs_cur_rank 0 total_num 1161 1429
9 146
9 25 56
checkcorrect 4 4 real s

9 150
9 37 99
checkcorrect 4 4 real score 1.2817524068057538 Hits@1 0.8301415487094088 Hits@3 0.9858451290591174 Hits@10 1.0 MRR 0.9048834965571024 cur_rank 0 abs_cur_rank 0 total_num 1200 1429
9 64
9 20 21
checkcorrect 2 2 real score 1.221498852968216 Hits@1 0.8302828618968386 Hits@3 0.9858569051580699 Hits@10 1.0 MRR 0.9049626284235275 cur_rank 0 abs_cur_rank 0 total_num 1201 1429
9 65
9 28 49
checkcorrect 2 2 real score 1.5928073078393936 Hits@1 0.830423940149626 Hits@3 0.9858686616791354 Hits@10 1.0 MRR 0.9050416287324023 cur_rank 0 abs_cur_rank 0 total_num 1202 1429
0 1
9 6 14
checkcorrect 4 4 real score 0.35962916910648346 Hits@1 0.829734219269103 Hits@3 0.9858803986710963 Hits@10 1.0 MRR 0.9047052154195017 cur_rank 1 abs_cur_rank 1 total_num 1203 1429
9 80
9 46 30
checkcorrect 2 2 real score 1.5574820592999459 Hits@1 0.8298755186721992 Hits@3 0.9858921161825727 Hits@10 1.0 MRR 0.9047842982282822 cur_rank 0 abs_cur_rank 0 total_num 1204 1429
0 0
9 9 21
checkcorrect 2 2 real score

9 11 5
checkcorrect 4 4 real score 0.31053894609212873 Hits@1 0.8287781350482315 Hits@3 0.9855305466237942 Hits@10 1.0 MRR 0.9039108355024758 cur_rank 0 abs_cur_rank 0 total_num 1243 1429
0 1
9 13 29
checkcorrect 2 2 real score 0.6579370856285095 Hits@1 0.8289156626506025 Hits@3 0.9855421686746988 Hits@10 1.0 MRR 0.903988015554281 cur_rank 0 abs_cur_rank 0 total_num 1244 1429
9 8
9 38 14
checkcorrect 4 4 real score 1.5669877529144287 Hits@1 0.8290529695024077 Hits@3 0.985553772070626 Hits@10 1.0 MRR 0.9040650717215729 cur_rank 0 abs_cur_rank 0 total_num 1245 1429
9 86
9 55 14
checkcorrect 2 2 real score 1.6540729284286497 Hits@1 0.8291900561347233 Hits@3 0.9855653568564555 Hits@10 1.0 MRR 0.9041420043023897 cur_rank 0 abs_cur_rank 0 total_num 1246 1429
0 1
9 9 22
checkcorrect 2 2 real score 0.7127049088478088 Hits@1 0.8293269230769231 Hits@3 0.9855769230769231 Hits@10 1.0 MRR 0.904218813593814 cur_rank 0 abs_cur_rank 0 total_num 1247 1429
0 0
9 28 21
checkcorrect 4 4 real score 0.53843

9 10 27
checkcorrect 2 2 real score 0.5283653765916825 Hits@1 0.8275058275058275 Hits@3 0.9852369852369852 Hits@10 1.0 MRR 0.903301019967687 cur_rank 0 abs_cur_rank 0 total_num 1286 1429
0 0
9 22 34
checkcorrect 4 4 real score 0.4582273781299591 Hits@1 0.8268633540372671 Hits@3 0.985248447204969 Hits@10 1.0 MRR 0.9029878980577741 cur_rank 1 abs_cur_rank 1 total_num 1287 1429
0 1
9 14 7
checkcorrect 2 2 real score 0.6899787485599518 Hits@1 0.8269976726144298 Hits@3 0.9852598913886734 Hits@10 1.0 MRR 0.9030631595798395 cur_rank 0 abs_cur_rank 0 total_num 1288 1429
9 15
9 37 27
checkcorrect 2 2 real score 1.6995275110006332 Hits@1 0.8271317829457364 Hits@3 0.9852713178294573 Hits@10 1.0 MRR 0.9031383044173745 cur_rank 0 abs_cur_rank 0 total_num 1289 1429
0 1
9 53 20
checkcorrect 2 2 real score 0.5228402700275183 Hits@1 0.8272656855151046 Hits@3 0.9852827265685515 Hits@10 1.0 MRR 0.9032133328415284 cur_rank 0 abs_cur_rank 0 total_num 1290 1429
9 84
9 31 25
checkcorrect 6 6 real score 0.883

0 1
9 27 23
checkcorrect 2 2 real score 0.6193193018436431 Hits@1 0.8263157894736842 Hits@3 0.9857142857142858 Hits@10 1.0 MRR 0.9025426661892828 cur_rank 0 abs_cur_rank 0 total_num 1329 1429
0 0
9 17 14
checkcorrect 12 12 real score 0.6514402568340302 Hits@1 0.8264462809917356 Hits@3 0.98572501878287 Hits@10 1.0 MRR 0.9026158873266312 cur_rank 0 abs_cur_rank 0 total_num 1330 1429
0 1
9 22 27
checkcorrect 2 2 real score 0.6130239427089691 Hits@1 0.8258258258258259 Hits@3 0.9857357357357357 Hits@10 1.0 MRR 0.9023136231469566 cur_rank 1 abs_cur_rank 1 total_num 1331 1429
0 0
0 1 1
checkcorrect 4 4 real score 0.0 Hits@1 0.8252063015753939 Hits@3 0.9857464366091523 Hits@10 1.0 MRR 0.9018867812191144 cur_rank 2 abs_cur_rank 2 total_num 1332 1429
9 116
9 30 39
checkcorrect 4 4 real score 1.2457169443368912 Hits@1 0.8253373313343328 Hits@3 0.9857571214392804 Hits@10 1.0 MRR 0.90196032935913 cur_rank 0 abs_cur_rank 0 total_num 1333 1429
9 16
9 22 19
checkcorrect 4 4 real score 1.31413736939430

9 9 17
checkcorrect 4 4 real score 0.5546155512332916 Hits@1 0.8244719592134013 Hits@3 0.9847050254916242 Hits@10 1.0 MRR 0.9011641753083851 cur_rank 0 abs_cur_rank 0 total_num 1372 1429
9 110
9 22 34
checkcorrect 2 2 real score 1.724886918067932 Hits@1 0.8245997088791849 Hits@3 0.9847161572052402 Hits@10 1.0 MRR 0.9012361082230078 cur_rank 0 abs_cur_rank 0 total_num 1373 1429
0 1
9 6 7
checkcorrect 2 2 real score 0.6253774434328079 Hits@1 0.8247272727272728 Hits@3 0.9847272727272727 Hits@10 1.0 MRR 0.9013079365079365 cur_rank 0 abs_cur_rank 0 total_num 1374 1429
0 1
9 7 10
checkcorrect 2 2 real score 0.739851713180542 Hits@1 0.8248546511627907 Hits@3 0.9847383720930233 Hits@10 1.0 MRR 0.9013796603912883 cur_rank 0 abs_cur_rank 0 total_num 1375 1429
9 72
9 13 25
checkcorrect 4 4 real score 0.7654629476368427 Hits@1 0.8242556281771968 Hits@3 0.9847494553376906 Hits@10 1.0 MRR 0.9010881718942721 cur_rank 1 abs_cur_rank 1 total_num 1376 1429
0 1
9 11 36
checkcorrect 2 2 real score 0.77321

9 16 29
checkcorrect 2 2 real score 0.6077509105205536 Hits@1 0.8248587570621468 Hits@3 0.9844632768361582 Hits@10 1.0 MRR 0.9013995381580127 cur_rank 0 abs_cur_rank 0 total_num 1415 1429
9 26
9 57 22
checkcorrect 6 6 real score 0.7021024256013334 Hits@1 0.8242766407904023 Hits@3 0.9844742413549753 Hits@10 1.0 MRR 0.9011162639603005 cur_rank 1 abs_cur_rank 1 total_num 1416 1429
9 76
9 49 25
checkcorrect 2 2 real score 1.7001095950603484 Hits@1 0.8244005641748943 Hits@3 0.9844851904090268 Hits@10 1.0 MRR 0.9011859986119506 cur_rank 0 abs_cur_rank 0 total_num 1417 1429
0 1
9 42 3
checkcorrect 4 4 real score 0.5891951829195022 Hits@1 0.8245243128964059 Hits@3 0.9844961240310077 Hits@10 1.0 MRR 0.9012556349765651 cur_rank 0 abs_cur_rank 0 total_num 1418 1429
0 0
9 24 31
checkcorrect 4 4 real score 0.2893545016646385 Hits@1 0.823943661971831 Hits@3 0.9845070422535211 Hits@10 1.0 MRR 0.9009730605857366 cur_rank 1 abs_cur_rank 1 total_num 1419 1429
0 0
9 11 22
checkcorrect 4 4 real score 0.46

In [32]:
###########################################
##obtain the AUC-PR for the test triples###
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import auc, plot_precision_recall_curve
import matplotlib.pyplot as plt

#we select all the triples in the inductive test set
pos_triples = list(data_ind_test)

#we build the negative samples by randomly replace head or tail entity in the triple.
neg_triples = list()

for i in range(len(pos_triples)):
    
    s_pos, r_pos, t_pos = pos_triples[i][0], pos_triples[i][1], pos_triples[i][2]
    
    #decide to replace the head or tail entity
    number_0 = random.uniform(0, 1)
    
    if number_0 < 0.5: #replace head entity
        
        s_neg = random.choice(list(new_ent_set))
        
        #filter out the existing triples
        while ((s_neg, r_pos, t_pos) in data_test) or (
               (s_neg, r_pos, t_pos) in data_valid) or (
               (s_neg, r_pos, t_pos) in data) or (
               (s_neg, r_pos, t_pos) in data_ind) or (
               (s_neg, r_pos, t_pos) in data_ind_valid) or (
               (s_neg, r_pos, t_pos) in data_ind_test):
            
            s_neg = random.choice(list(new_ent_set))
        
        neg_triples.append((s_neg, r_pos, t_pos))
    
    else: #replace tail entity

        t_neg = random.choice(list(new_ent_set))
        
        #filter out the existing triples
        while ((s_pos, r_pos, t_neg) in data_test) or (
               (s_pos, r_pos, t_neg) in data_valid) or (
               (s_pos, r_pos, t_neg) in data) or (
               (s_pos, r_pos, t_neg) in data_ind) or (
               (s_pos, r_pos, t_neg) in data_ind_valid) or (
               (s_pos, r_pos, t_neg) in data_ind_test):
            
            t_neg = random.choice(list(new_ent_set))
        
        neg_triples.append((s_pos, r_pos, t_neg))

if len(pos_triples) != len(neg_triples):
    raise ValueError('error when generating negative triples')
        
#combine all triples
all_triples = pos_triples + neg_triples

#obtain the label array
arr1 = np.ones((len(pos_triples),))
arr2 = np.zeros((len(neg_triples),))
y_test = np.concatenate((arr1, arr2))

#shuffle positive and negative triples (optional)
all_triples, y_test = shuffle(all_triples, y_test)

#obtain the score aray
y_score = np.zeros((len(y_test),))

#implement the scoring
for i in range(len(all_triples)):
    
    s, r, t = all_triples[i][0], all_triples[i][1], all_triples[i][2]
    
    #path_score = path_based_triple_scoring(s, r, t, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)
    
    subg_score = subgraph_triple_scoring(s, r, t, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #ave_score = (path_score + subg_score)/float(2)
    
    #y_score[i] = ave_score
    y_score[i] = subg_score
    
    if i % 20 == 0 and i > 0:
        print('evaluating scores', i, len(all_triples))
        
        # Data to plot precision - recall curve
        precision, recall, thresholds = precision_recall_curve(y_test[:i], y_score[:i])
        # Use AUC function to calculate the area under the curve of precision recall curve
        auc_precision_recall = auc(recall, precision)
        print('AUC-PR is:', auc_precision_recall)
        
        
# Data to plot precision - recall curve
precision, recall, thresholds = precision_recall_curve(y_test, y_score)
# Use AUC function to calculate the area under the curve of precision recall curve
auc_precision_recall = auc(recall, precision)
print('AUC-PR is:', auc_precision_recall)

evaluating scores 20 2858
AUC-PR is: 0.6216331789125906
evaluating scores 40 2858
AUC-PR is: 0.7616961827804294
evaluating scores 60 2858
AUC-PR is: 0.7401220289707974
evaluating scores 80 2858
AUC-PR is: 0.7535261091294837
evaluating scores 100 2858
AUC-PR is: 0.7608552270081776
evaluating scores 120 2858
AUC-PR is: 0.7607503556214654
evaluating scores 140 2858
AUC-PR is: 0.7799555582077222
evaluating scores 160 2858
AUC-PR is: 0.7747786548366198
evaluating scores 180 2858
AUC-PR is: 0.7849838527038903
evaluating scores 200 2858
AUC-PR is: 0.753651240516782
evaluating scores 220 2858
AUC-PR is: 0.7564995239384278
evaluating scores 240 2858
AUC-PR is: 0.7485437788549277
evaluating scores 260 2858
AUC-PR is: 0.7501146433823228
evaluating scores 280 2858
AUC-PR is: 0.7349880426463673
evaluating scores 300 2858
AUC-PR is: 0.7554593136175546
evaluating scores 320 2858
AUC-PR is: 0.7367091438244133
evaluating scores 340 2858
AUC-PR is: 0.7327606077333607
evaluating scores 360 2858
AUC-PR is

In [33]:
##########################################################
##obtain the AUC-PR for the test triples, using sklearn###
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import auc, plot_precision_recall_curve
import matplotlib.pyplot as plt

#we select all the triples in the inductive test set
pos_triples = list(data_ind_test)

#we build the negative samples by randomly replace head or tail entity in the triple.
neg_triples = list()

for i in range(len(pos_triples)):
    
    s_pos, r_pos, t_pos = pos_triples[i][0], pos_triples[i][1], pos_triples[i][2]
    
    #decide to replace the head or tail entity
    number_0 = random.uniform(0, 1)
    
    if number_0 < 0.5: #replace head entity
        
        s_neg = random.choice(list(new_ent_set))
        
        #filter out the existing triples
        while ((s_neg, r_pos, t_pos) in data_test) or (
               (s_neg, r_pos, t_pos) in data_valid) or (
               (s_neg, r_pos, t_pos) in data) or (
               (s_neg, r_pos, t_pos) in data_ind) or (
               (s_neg, r_pos, t_pos) in data_ind_valid) or (
               (s_neg, r_pos, t_pos) in data_ind_test):
            
            s_neg = random.choice(list(new_ent_set))
        
        neg_triples.append((s_neg, r_pos, t_pos))
    
    else: #replace tail entity

        t_neg = random.choice(list(new_ent_set))
        
        #filter out the existing triples
        while ((s_pos, r_pos, t_neg) in data_test) or (
               (s_pos, r_pos, t_neg) in data_valid) or (
               (s_pos, r_pos, t_neg) in data) or (
               (s_pos, r_pos, t_neg) in data_ind) or (
               (s_pos, r_pos, t_neg) in data_ind_valid) or (
               (s_pos, r_pos, t_neg) in data_ind_test):
            
            t_neg = random.choice(list(new_ent_set))
        
        neg_triples.append((s_pos, r_pos, t_neg))

if len(pos_triples) != len(neg_triples):
    raise ValueError('error when generating negative triples')
        
#combine all triples
all_triples = pos_triples + neg_triples

#obtain the label array
arr1 = np.ones((len(pos_triples),))
arr2 = np.zeros((len(neg_triples),))
y_test = np.concatenate((arr1, arr2))

#shuffle positive and negative triples (optional)
all_triples, y_test = shuffle(all_triples, y_test)

#obtain the score aray
y_score = np.zeros((len(y_test),))

#implement the scoring
for i in range(len(all_triples)):
    
    s, r, t = all_triples[i][0], all_triples[i][1], all_triples[i][2]
    
    #path_score = path_based_triple_scoring(s, r, t, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)
    
    subg_score = subgraph_triple_scoring(s, r, t, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #ave_score = (path_score + subg_score)/float(2)
    
    #y_score[i] = ave_score
    y_score[i] = subg_score
    
    if i % 20 == 0 and i > 0:
        print('evaluating scores', i, len(all_triples))
        auc = metrics.roc_auc_score(y_test[:i], y_score[:i])
        auc_pr = metrics.average_precision_score(y_test[:i], y_score[:i])
        print('auc, auc-pr', auc, auc_pr)
        
print('evaluating scores', i, len(all_triples))
auc = metrics.roc_auc_score(y_test, y_score)
auc_pr = metrics.average_precision_score(y_test, y_score)
print('(final) auc, auc-pr', auc, auc_pr)

evaluating scores 20 2858
auc, auc-pr 0.8273809523809523 0.9223897134611418
evaluating scores 40 2858
auc, auc-pr 0.7129120879120879 0.8261535330653095
evaluating scores 60 2858
auc, auc-pr 0.6827262044653349 0.7666396582155786
evaluating scores 80 2858
auc, auc-pr 0.6919025674786045 0.771587898576326
evaluating scores 100 2858
auc, auc-pr 0.6293103448275862 0.7008871520605896
evaluating scores 120 2858
auc, auc-pr 0.6230203619909502 0.6972042345057767
evaluating scores 140 2858
auc, auc-pr 0.6346516007532956 0.715588033051898
evaluating scores 160 2858
auc, auc-pr 0.6591232789998417 0.7087285574477529
evaluating scores 180 2858
auc, auc-pr 0.6514718668488386 0.6935327749467828
evaluating scores 200 2858
auc, auc-pr 0.6650660264105642 0.686640910821871
evaluating scores 220 2858
auc, auc-pr 0.6759351208209201 0.701527797912207
evaluating scores 240 2858
auc, auc-pr 0.6711624026696329 0.6986142163498967
evaluating scores 260 2858
auc, auc-pr 0.6738499792789059 0.6936482332482707
evaluat

evaluating scores 2160 2858
auc, auc-pr 0.719288437843775 0.7170113227028303
evaluating scores 2180 2858
auc, auc-pr 0.7192394781144781 0.7172964696308375
evaluating scores 2200 2858
auc, auc-pr 0.7197393293048314 0.7183175457222959
evaluating scores 2220 2858
auc, auc-pr 0.7195028995765905 0.7191755824482717
evaluating scores 2240 2858
auc, auc-pr 0.7194029041527534 0.7193302150791707
evaluating scores 2260 2858
auc, auc-pr 0.7195094442477654 0.7188444384448561
evaluating scores 2280 2858
auc, auc-pr 0.7196926614435478 0.7186189205875941
evaluating scores 2300 2858
auc, auc-pr 0.7184649786483299 0.7176700660358429
evaluating scores 2320 2858
auc, auc-pr 0.7187434738631238 0.715841129068929
evaluating scores 2340 2858
auc, auc-pr 0.7184554714684702 0.7161894233814056
evaluating scores 2360 2858
auc, auc-pr 0.7199523650822839 0.7185213176121503
evaluating scores 2380 2858
auc, auc-pr 0.7172910033927609 0.715787763838704
evaluating scores 2400 2858
auc, auc-pr 0.7177772576235775 0.717019

In [34]:
######################################################
#obtain the Hits@N for entity prediction##############

#we select all the triples in the inductive test set
selected = list(data_ind_test)

###Hit at 1#############################
#generate the negative samples by randomly replace relation with all the other relaiton
Hits_at_1 = 0
Hits_at_3 = 0
Hits_at_10 = 0
MRR_raw = 0.

for i in range(len(selected)):
    
    triple_list = list()
    
    #score the true triple
    s_pos, r_pos, t_pos = selected[i][0], selected[i][1], selected[i][2]

    #path_score = path_based_triple_scoring(s_pos, r_pos, t_pos, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)

    subg_score = subgraph_triple_scoring(s_pos, r_pos, t_pos, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #ave_score = (path_score + subg_score)/float(2)
    
    triple_list.append([(s_pos, r_pos, t_pos), subg_score])
    
    #generate the 50 random samples
    for sub_i in range(50):
        
        #decide to replace the head or tail entity
        number_0 = random.uniform(0, 1)

        if number_0 < 0.5: #replace head entity
            
            s_neg = random.choice(list(new_ent_set))
            
            while ((s_neg, r_pos, t_pos) in data_test) or (
                   (s_neg, r_pos, t_pos) in data_valid) or (
                   (s_neg, r_pos, t_pos) in data) or (
                   (s_neg, r_pos, t_pos) in data_ind) or (
                   (s_neg, r_pos, t_pos) in data_ind_valid) or (
                   (s_neg, r_pos, t_pos) in data_ind_test):

                s_neg = random.choice(list(new_ent_set))
            
            #path_score = path_based_triple_scoring(s_neg, r_pos, t_pos, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)

            subg_score = subgraph_triple_scoring(s_neg, r_pos, t_pos, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)

            #ave_score = (path_score + subg_score)/float(2)

            triple_list.append([(s_neg, r_pos, t_pos), subg_score])
            
        else: #replace tail entity

            t_neg = random.choice(list(new_ent_set))
            
            #filter out the existing triples
            while ((s_pos, r_pos, t_neg) in data_test) or (
                   (s_pos, r_pos, t_neg) in data_valid) or (
                   (s_pos, r_pos, t_neg) in data) or (
                   (s_pos, r_pos, t_neg) in data_ind) or (
                   (s_pos, r_pos, t_neg) in data_ind_valid) or (
                   (s_pos, r_pos, t_neg) in data_ind_test):

                t_neg = random.choice(list(new_ent_set))
            
            #path_score = path_based_triple_scoring(s_pos, r_pos, t_neg, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)

            subg_score = subgraph_triple_scoring(s_pos, r_pos, t_neg, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)

            #ave_score = (path_score + subg_score)/float(2)

            triple_list.append([(s_pos, r_pos, t_neg), subg_score])
            
    #random shuffle!
    random.shuffle(triple_list)
    
    #sort
    sorted_list = sorted(triple_list, key = lambda x: x[-1], reverse=True)
    
    p = 0
    
    while p < len(sorted_list) and sorted_list[p][0] != (s_pos, r_pos, t_pos):
            
        p += 1
    
    if p == 0:
        
        Hits_at_1 += 1
        
    if p < 3:
        
        Hits_at_3 += 1
        
    if p < 10:
        
        Hits_at_10 += 1
        
    MRR_raw += 1./float(p + 1.) 
        
    print('checkcorrect', (s_pos, r_pos, t_pos), sorted_list[p][0],
          'real score', sorted_list[p][-1],
          'Hits@1', Hits_at_1/(i+1),
          'Hits@3', Hits_at_3/(i+1),
          'Hits@10', Hits_at_10/(i+1),
          'MRR', MRR_raw/(i+1),
          'rank', p,
          'total_num', i, len(selected))

checkcorrect (9272, 2, 3997) (9272, 2, 3997) real score 0.6795412719249725 Hits@1 0.0 Hits@3 0.0 Hits@10 0.0 MRR 0.08333333333333333 rank 11 total_num 0 1429
checkcorrect (6007, 2, 5903) (6007, 2, 5903) real score 0.7114194571971894 Hits@1 0.0 Hits@3 0.0 Hits@10 0.5 MRR 0.125 rank 5 total_num 1 1429
checkcorrect (10413, 2, 7851) (10413, 2, 7851) real score 0.6482020348310471 Hits@1 0.0 Hits@3 0.0 Hits@10 0.6666666666666666 MRR 0.13888888888888887 rank 5 total_num 2 1429
checkcorrect (4566, 2, 7229) (4566, 2, 7229) real score 0.7900169491767883 Hits@1 0.25 Hits@3 0.25 Hits@10 0.75 MRR 0.35416666666666663 rank 0 total_num 3 1429
checkcorrect (9752, 2, 7279) (9752, 2, 7279) real score 0.704738998413086 Hits@1 0.2 Hits@3 0.2 Hits@10 0.8 MRR 0.3055555555555555 rank 8 total_num 4 1429
checkcorrect (8071, 2, 5711) (8071, 2, 5711) real score 0.6707434564828872 Hits@1 0.16666666666666666 Hits@3 0.16666666666666666 Hits@10 0.8333333333333334 MRR 0.2962962962962963 rank 3 total_num 5 1429
checkco

checkcorrect (6621, 4, 4577) (6621, 4, 4577) real score 0.13498107641935347 Hits@1 0.1111111111111111 Hits@3 0.24444444444444444 Hits@10 0.5555555555555556 MRR 0.25040541131805405 rank 39 total_num 44 1429
checkcorrect (9275, 2, 10054) (9275, 2, 10054) real score 0.6295775234699249 Hits@1 0.10869565217391304 Hits@3 0.2391304347826087 Hits@10 0.5434782608695652 MRR 0.2464110907821543 rank 14 total_num 45 1429
checkcorrect (3991, 2, 3990) (3991, 2, 3990) real score 0.7001687407493591 Hits@1 0.10638297872340426 Hits@3 0.23404255319148937 Hits@10 0.5319148936170213 MRR 0.24294135126196664 rank 11 total_num 46 1429
checkcorrect (9550, 2, 4354) (9550, 2, 4354) real score 0.6470039546489715 Hits@1 0.10416666666666667 Hits@3 0.22916666666666666 Hits@10 0.5208333333333334 MRR 0.23910556330675412 rank 16 total_num 47 1429
checkcorrect (5734, 0, 9917) (5734, 0, 9917) real score 0.8032101899385452 Hits@1 0.12244897959183673 Hits@3 0.24489795918367346 Hits@10 0.5306122448979592 MRR 0.25463402119845

checkcorrect (10390, 4, 5892) (10390, 4, 5892) real score 0.48198080956935885 Hits@1 0.12790697674418605 Hits@3 0.2558139534883721 Hits@10 0.5 MRR 0.25651746148068294 rank 24 total_num 85 1429
checkcorrect (8877, 2, 5344) (8877, 2, 5344) real score 0.7354412257671357 Hits@1 0.13793103448275862 Hits@3 0.26436781609195403 Hits@10 0.5057471264367817 MRR 0.26506323778550267 rank 0 total_num 86 1429
checkcorrect (6572, 2, 9885) (6572, 2, 9885) real score 0.6535520732402802 Hits@1 0.13636363636363635 Hits@3 0.26136363636363635 Hits@10 0.5 MRR 0.2628628438496285 rank 13 total_num 87 1429
checkcorrect (4147, 4, 4457) (4147, 4, 4457) real score 0.5925023019313812 Hits@1 0.1348314606741573 Hits@3 0.25842696629213485 Hits@10 0.5056179775280899 MRR 0.26151446518679156 rank 6 total_num 88 1429
checkcorrect (10788, 4, 10428) (10788, 4, 10428) real score 0.6791635990142822 Hits@1 0.13333333333333333 Hits@3 0.26666666666666666 Hits@10 0.5111111111111111 MRR 0.26416430446249384 rank 1 total_num 89 1429

checkcorrect (7596, 2, 4073) (7596, 2, 4073) real score 0.5430383712053299 Hits@1 0.11023622047244094 Hits@3 0.2440944881889764 Hits@10 0.5118110236220472 MRR 0.24098088768408735 rank 19 total_num 126 1429
checkcorrect (7853, 2, 6543) (7853, 2, 6543) real score 0.6825966894626617 Hits@1 0.109375 Hits@3 0.25 Hits@10 0.515625 MRR 0.2430044744990554 rank 1 total_num 127 1429
checkcorrect (8266, 8, 4309) (8266, 8, 4309) real score 0.9362723469734192 Hits@1 0.10852713178294573 Hits@3 0.2558139534883721 Hits@10 0.5193798449612403 MRR 0.24499668787503173 rank 1 total_num 128 1429
checkcorrect (8854, 2, 7772) (8854, 2, 7772) real score 0.4339876502752304 Hits@1 0.1076923076923077 Hits@3 0.25384615384615383 Hits@10 0.5153846153846153 MRR 0.24336023692611708 rank 30 total_num 129 1429
checkcorrect (5293, 2, 5084) (5293, 2, 5084) real score 0.19599015936255454 Hits@1 0.10687022900763359 Hits@3 0.25190839694656486 Hits@10 0.5114503816793893 MRR 0.24176575235890008 rank 28 total_num 130 1429
checkc

checkcorrect (6324, 2, 6323) (6324, 2, 6323) real score 0.7366640031337738 Hits@1 0.10714285714285714 Hits@3 0.25 Hits@10 0.5238095238095238 MRR 0.24051394358611575 rank 5 total_num 167 1429
checkcorrect (10521, 2, 6496) (10521, 2, 6496) real score 0.6581735104322434 Hits@1 0.10650887573964497 Hits@3 0.2485207100591716 Hits@10 0.5207100591715976 MRR 0.23954595029225165 rank 12 total_num 168 1429
checkcorrect (4862, 2, 5601) (4862, 2, 5601) real score 0.5908470302820206 Hits@1 0.10588235294117647 Hits@3 0.24705882352941178 Hits@10 0.5176470588235295 MRR 0.23843097411406192 rank 19 total_num 169 1429
checkcorrect (6880, 2, 5789) (6880, 2, 5789) real score 0.6827942550182342 Hits@1 0.10526315789473684 Hits@3 0.24561403508771928 Hits@10 0.52046783625731 MRR 0.2384986292362019 rank 3 total_num 170 1429
checkcorrect (10314, 4, 6400) (10314, 4, 6400) real score 0.5612967997789383 Hits@1 0.10465116279069768 Hits@3 0.2441860465116279 Hits@10 0.5174418604651163 MRR 0.23745400656280402 rank 16 to

checkcorrect (7268, 2, 8858) (7268, 2, 8858) real score 0.6950631976127625 Hits@1 0.10047846889952153 Hits@3 0.24401913875598086 Hits@10 0.5167464114832536 MRR 0.23586734134644763 rank 1 total_num 208 1429
checkcorrect (10828, 16, 7770) (10828, 16, 7770) real score -0.15670942291617393 Hits@1 0.1 Hits@3 0.24285714285714285 Hits@10 0.5142857142857142 MRR 0.23500871379506244 rank 17 total_num 209 1429
checkcorrect (4781, 2, 10700) (4781, 2, 10700) real score 0.699924212694168 Hits@1 0.0995260663507109 Hits@3 0.24170616113744076 Hits@10 0.5165876777251185 MRR 0.23484279572020433 rank 4 total_num 210 1429
checkcorrect (10455, 2, 6246) (10455, 2, 6246) real score 0.71942840218544 Hits@1 0.09905660377358491 Hits@3 0.24056603773584906 Hits@10 0.5188679245283019 MRR 0.23467844291020337 rank 4 total_num 211 1429
checkcorrect (5107, 2, 5106) (5107, 2, 5106) real score 0.7342933416366577 Hits@1 0.09859154929577464 Hits@3 0.24413145539906103 Hits@10 0.5211267605633803 MRR 0.235924084023301 rank 1 

checkcorrect (5762, 2, 4640) (5762, 2, 4640) real score 0.6995078384876251 Hits@1 0.088 Hits@3 0.224 Hits@10 0.512 MRR 0.22309587085835883 rank 4 total_num 249 1429
checkcorrect (10780, 2, 4141) (10780, 2, 4141) real score 0.6922295093536377 Hits@1 0.08764940239043825 Hits@3 0.22310756972111553 Hits@10 0.5139442231075697 MRR 0.2226054490621104 rank 9 total_num 250 1429
checkcorrect (10228, 2, 6880) (10228, 2, 6880) real score 0.6491049826145172 Hits@1 0.0873015873015873 Hits@3 0.2222222222222222 Hits@10 0.5119047619047619 MRR 0.22195552080952968 rank 16 total_num 251 1429
checkcorrect (4932, 8, 4931) (4932, 8, 4931) real score 0.5423807986546307 Hits@1 0.08695652173913043 Hits@3 0.22529644268774704 Hits@10 0.5138339920948617 MRR 0.22239574931752892 rank 2 total_num 252 1429
checkcorrect (7420, 4, 5327) (7420, 4, 5327) real score 0.3515753194689751 Hits@1 0.08661417322834646 Hits@3 0.22440944881889763 Hits@10 0.5118110236220472 MRR 0.22171702589501893 rank 19 total_num 253 1429
checkcor

checkcorrect (7326, 4, 4312) (7326, 4, 4312) real score 0.4941901475191116 Hits@1 0.0859106529209622 Hits@3 0.22336769759450173 Hits@10 0.4948453608247423 MRR 0.21883406334233074 rank 19 total_num 290 1429
checkcorrect (5944, 0, 8845) (5944, 0, 8845) real score 0.5597261346876621 Hits@1 0.08561643835616438 Hits@3 0.22602739726027396 Hits@10 0.4965753424657534 MRR 0.2197969603856789 rank 1 total_num 291 1429
checkcorrect (4693, 2, 4692) (4693, 2, 4692) real score 0.629128497838974 Hits@1 0.08532423208191127 Hits@3 0.22525597269624573 Hits@10 0.4948805460750853 MRR 0.21922643007360276 rank 18 total_num 292 1429
checkcorrect (10607, 2, 7699) (10607, 2, 7699) real score 0.6058221638202668 Hits@1 0.08503401360544217 Hits@3 0.22448979591836735 Hits@10 0.4931972789115646 MRR 0.2186933469781143 rank 15 total_num 293 1429
checkcorrect (6803, 2, 7283) (6803, 2, 7283) real score 0.6897732079029083 Hits@1 0.0847457627118644 Hits@3 0.22372881355932203 Hits@10 0.49491525423728816 MRR 0.2183757424120

checkcorrect (6888, 2, 7191) (6888, 2, 7191) real score 0.70221706032753 Hits@1 0.08734939759036145 Hits@3 0.2289156626506024 Hits@10 0.5030120481927711 MRR 0.22101038788774136 rank 8 total_num 331 1429
checkcorrect (9128, 4, 6434) (9128, 4, 6434) real score 0.6078658282756806 Hits@1 0.08708708708708708 Hits@3 0.22822822822822822 Hits@10 0.5015015015015015 MRR 0.22047181815434475 rank 23 total_num 332 1429
checkcorrect (10440, 4, 4017) (10440, 4, 4017) real score 0.33914627581834794 Hits@1 0.08682634730538923 Hits@3 0.2275449101796407 Hits@10 0.5 MRR 0.21994781434386632 rank 21 total_num 333 1429
checkcorrect (10486, 2, 8602) (10486, 2, 8602) real score 0.627073860168457 Hits@1 0.08656716417910448 Hits@3 0.22686567164179106 Hits@10 0.49850746268656715 MRR 0.21946684632914362 rank 16 total_num 334 1429
checkcorrect (7143, 2, 9303) (7143, 2, 9303) real score 0.5857747972011567 Hits@1 0.08630952380952381 Hits@3 0.2261904761904762 Hits@10 0.49702380952380953 MRR 0.2189887412192705 rank 16 

checkcorrect (4362, 2, 6033) (4362, 2, 6033) real score 0.6574803531169892 Hits@1 0.08579088471849866 Hits@3 0.23056300268096513 Hits@10 0.5013404825737265 MRR 0.21939380928480498 rank 10 total_num 372 1429
checkcorrect (6729, 2, 6864) (6729, 2, 6864) real score 0.5917437553405762 Hits@1 0.0855614973262032 Hits@3 0.22994652406417113 Hits@10 0.5 MRR 0.21899818030657975 rank 13 total_num 373 1429
checkcorrect (5998, 2, 7755) (5998, 2, 7755) real score 0.40405691117048265 Hits@1 0.08533333333333333 Hits@3 0.22933333333333333 Hits@10 0.49866666666666665 MRR 0.21863640738131773 rank 11 total_num 374 1429
checkcorrect (9059, 8, 6650) (9059, 8, 6650) real score 0.665511180460453 Hits@1 0.0851063829787234 Hits@3 0.22872340425531915 Hits@10 0.5 MRR 0.21871982119147382 rank 3 total_num 375 1429
checkcorrect (10757, 2, 10690) (10757, 2, 10690) real score 0.6364290893077851 Hits@1 0.08488063660477453 Hits@3 0.22811671087533156 Hits@10 0.5013262599469496 MRR 0.21858174916355658 rank 5 total_num 376

checkcorrect (5733, 2, 10760) (5733, 2, 10760) real score 0.9216837644577026 Hits@1 0.07971014492753623 Hits@3 0.2246376811594203 Hits@10 0.5048309178743962 MRR 0.214813241962211 rank 0 total_num 413 1429
checkcorrect (8039, 2, 8495) (8039, 2, 8495) real score 0.7074452102184295 Hits@1 0.07951807228915662 Hits@3 0.22409638554216868 Hits@10 0.5060240963855421 MRR 0.21453658354784422 rank 9 total_num 414 1429
checkcorrect (4032, 4, 4740) (4032, 4, 4740) real score 0.632895576953888 Hits@1 0.07932692307692307 Hits@3 0.22355769230769232 Hits@10 0.5048076923076923 MRR 0.21416227332155557 rank 16 total_num 415 1429
checkcorrect (5554, 6, 8184) (5554, 6, 8184) real score 0.5541687846183777 Hits@1 0.07913669064748201 Hits@3 0.22302158273381295 Hits@10 0.5059952038369304 MRR 0.21412831103541277 rank 4 total_num 416 1429
checkcorrect (5125, 2, 9700) (5125, 2, 9700) real score 0.6357281267642975 Hits@1 0.07894736842105263 Hits@3 0.22488038277511962 Hits@10 0.507177033492823 MRR 0.214812214597529 

checkcorrect (4531, 2, 7620) (4531, 2, 7620) real score 0.6073150277137757 Hits@1 0.08131868131868132 Hits@3 0.21978021978021978 Hits@10 0.5054945054945055 MRR 0.2141827473373812 rank 14 total_num 454 1429
checkcorrect (7483, 2, 9863) (7483, 2, 9863) real score 0.6527796059846878 Hits@1 0.08114035087719298 Hits@3 0.21929824561403508 Hits@10 0.506578947368421 MRR 0.21395671304741135 rank 8 total_num 455 1429
checkcorrect (7775, 8, 4011) (7775, 8, 4011) real score 0.5824573516845704 Hits@1 0.08096280087527352 Hits@3 0.2210065645514223 Hits@10 0.5076586433260394 MRR 0.21458262833614786 rank 1 total_num 456 1429
checkcorrect (5343, 2, 8512) (5343, 2, 8512) real score 0.7064210057258606 Hits@1 0.08078602620087336 Hits@3 0.2205240174672489 Hits@10 0.5087336244541485 MRR 0.214387033077772 rank 7 total_num 457 1429
checkcorrect (5583, 2, 4348) (5583, 2, 4348) real score 0.5571934670209885 Hits@1 0.08061002178649238 Hits@3 0.22004357298474944 Hits@10 0.5076252723311547 MRR 0.21401898844242725 r

checkcorrect (4939, 2, 6344) (4939, 2, 6344) real score 0.6545372664928436 Hits@1 0.08484848484848485 Hits@3 0.2222222222222222 Hits@10 0.503030303030303 MRR 0.2162606357532624 rank 15 total_num 494 1429
checkcorrect (4272, 8, 8764) (4272, 8, 8764) real score 0.7654363602399826 Hits@1 0.0846774193548387 Hits@3 0.2217741935483871 Hits@10 0.5040322580645161 MRR 0.2163286586650502 rank 3 total_num 495 1429
checkcorrect (9316, 2, 7695) (9316, 2, 7695) real score 0.7349098503589631 Hits@1 0.08450704225352113 Hits@3 0.22132796780684105 Hits@10 0.5050301810865191 MRR 0.21618082865336424 rank 6 total_num 496 1429
checkcorrect (9361, 2, 8188) (9361, 2, 8188) real score 0.6889766156673431 Hits@1 0.08433734939759036 Hits@3 0.22088353413654618 Hits@10 0.5060240963855421 MRR 0.21608140262527853 rank 5 total_num 497 1429
checkcorrect (6541, 2, 8146) (6541, 2, 8146) real score 0.6445994466543198 Hits@1 0.0841683366733467 Hits@3 0.22044088176352705 Hits@10 0.5050100200400801 MRR 0.2157438027154464 ran

checkcorrect (8563, 4, 5618) (8563, 4, 5618) real score 0.5724386274814606 Hits@1 0.08582089552238806 Hits@3 0.22201492537313433 Hits@10 0.5074626865671642 MRR 0.2178809696743734 rank 8 total_num 535 1429
checkcorrect (4558, 2, 9470) (4558, 2, 9470) real score 0.7160667955875397 Hits@1 0.0856610800744879 Hits@3 0.22160148975791433 Hits@10 0.5083798882681564 MRR 0.21778559853283205 rank 5 total_num 536 1429
checkcorrect (5741, 6, 5545) (5741, 6, 5545) real score 0.8499943494796753 Hits@1 0.08550185873605948 Hits@3 0.22304832713754646 Hits@10 0.5092936802973977 MRR 0.2183101606173435 rank 1 total_num 537 1429
checkcorrect (7034, 2, 5339) (7034, 2, 5339) real score 0.637724655866623 Hits@1 0.08534322820037106 Hits@3 0.22263450834879406 Hits@10 0.5083487940630798 MRR 0.21804784691846732 rank 12 total_num 538 1429
checkcorrect (4020, 2, 6069) (4020, 2, 6069) real score 0.6947329759597778 Hits@1 0.08518518518518518 Hits@3 0.22407407407407406 Hits@10 0.5092592592592593 MRR 0.21856998053528498

checkcorrect (6432, 2, 6109) (6432, 2, 6109) real score 0.5953630536794663 Hits@1 0.08854166666666667 Hits@3 0.22916666666666666 Hits@10 0.5086805555555556 MRR 0.2213440475055131 rank 14 total_num 575 1429
checkcorrect (4821, 0, 7250) (4821, 0, 7250) real score 0.051653106114827096 Hits@1 0.08838821490467938 Hits@3 0.22876949740034663 Hits@10 0.5077989601386482 MRR 0.22109375119601146 rank 12 total_num 576 1429
checkcorrect (3862, 2, 5214) (3862, 2, 5214) real score 0.7344509482383728 Hits@1 0.08996539792387544 Hits@3 0.2301038062283737 Hits@10 0.5086505190311419 MRR 0.22244133986176232 rank 0 total_num 577 1429
checkcorrect (4145, 4, 9437) (4145, 4, 9437) real score 0.5183905690908432 Hits@1 0.08981001727115717 Hits@3 0.229706390328152 Hits@10 0.5077720207253886 MRR 0.222180523335971 rank 13 total_num 578 1429
checkcorrect (6851, 4, 3863) (6851, 4, 3863) real score 0.2808252349495888 Hits@1 0.0896551724137931 Hits@3 0.2293103448275862 Hits@10 0.506896551724138 MRR 0.22188366036470208 

checkcorrect (8340, 2, 8330) (8340, 2, 8330) real score 0.5999452322721481 Hits@1 0.09090909090909091 Hits@3 0.2305194805194805 Hits@10 0.5097402597402597 MRR 0.22225352753059271 rank 9 total_num 615 1429
checkcorrect (10519, 4, 4632) (10519, 4, 4632) real score 0.3412176840007305 Hits@1 0.09076175040518639 Hits@3 0.23014586709886548 Hits@10 0.5089141004862237 MRR 0.22199460771287702 rank 15 total_num 616 1429
checkcorrect (8333, 2, 10944) (8333, 2, 10944) real score 0.7056960999965668 Hits@1 0.09061488673139159 Hits@3 0.2297734627831715 Hits@10 0.5097087378640777 MRR 0.2218376585094581 rank 7 total_num 617 1429
checkcorrect (9531, 4, 5725) (9531, 4, 5725) real score 0.2173534855246544 Hits@1 0.09046849757673667 Hits@3 0.2294022617124394 Hits@10 0.5088852988691438 MRR 0.2215168477751189 rank 42 total_num 618 1429
checkcorrect (10118, 4, 4420) (10118, 4, 4420) real score 0.5339395835995674 Hits@1 0.09032258064516129 Hits@3 0.22903225806451613 Hits@10 0.5080645161290323 MRR 0.22130619010

checkcorrect (6556, 2, 10893) (6556, 2, 10893) real score 0.6397948950529099 Hits@1 0.0882800608828006 Hits@3 0.2313546423135464 Hits@10 0.5038051750380518 MRR 0.22070204168278654 rank 1 total_num 656 1429
checkcorrect (4661, 2, 7732) (4661, 2, 7732) real score 0.7798953831195832 Hits@1 0.08966565349544073 Hits@3 0.23252279635258358 Hits@10 0.5045592705167173 MRR 0.22188638508448444 rank 0 total_num 657 1429
checkcorrect (9595, 2, 4416) (9595, 2, 4416) real score 0.5765967905521393 Hits@1 0.08952959028831563 Hits@3 0.2321699544764795 Hits@10 0.503793626707132 MRR 0.22160387810516696 rank 27 total_num 658 1429
checkcorrect (4562, 2, 5893) (4562, 2, 5893) real score 0.669210895895958 Hits@1 0.0893939393939394 Hits@3 0.2318181818181818 Hits@10 0.503030303030303 MRR 0.2213763397617176 rank 13 total_num 659 1429
checkcorrect (10753, 4, 5555) (10753, 4, 5555) real score 0.7730322569608689 Hits@1 0.08925869894099848 Hits@3 0.2329803328290469 Hits@10 0.5037821482602118 MRR 0.22179785815844724 

checkcorrect (4872, 2, 9588) (4872, 2, 9588) real score 0.7429409176111221 Hits@1 0.08751793400286945 Hits@3 0.2309899569583931 Hits@10 0.503586800573888 MRR 0.22024318128120593 rank 1 total_num 696 1429
checkcorrect (5653, 2, 8309) (5653, 2, 8309) real score 0.6237332880496979 Hits@1 0.08739255014326648 Hits@3 0.23209169054441262 Hits@10 0.504297994269341 MRR 0.22040520155635226 rank 2 total_num 697 1429
checkcorrect (9923, 2, 5754) (9923, 2, 5754) real score 0.6348088324069977 Hits@1 0.08726752503576538 Hits@3 0.2317596566523605 Hits@10 0.5035765379113019 MRR 0.22020910446304323 rank 11 total_num 698 1429
checkcorrect (8287, 4, 4981) (8287, 4, 4981) real score 0.0 Hits@1 0.08714285714285715 Hits@3 0.23142857142857143 Hits@10 0.5028571428571429 MRR 0.21993211401305843 rank 37 total_num 699 1429
checkcorrect (10630, 2, 6849) (10630, 2, 6849) real score 0.6452685952186584 Hits@1 0.08701854493580599 Hits@3 0.23109843081312412 Hits@10 0.5021398002853067 MRR 0.2197281068274807 rank 12 tota

checkcorrect (9330, 4, 9181) (9330, 4, 9181) real score 0.3445544376969337 Hits@1 0.08955223880597014 Hits@3 0.23337856173677068 Hits@10 0.5061058344640434 MRR 0.2220513412453701 rank 20 total_num 736 1429
checkcorrect (6494, 4, 4144) (6494, 4, 4144) real score 0.5725152105093002 Hits@1 0.08943089430894309 Hits@3 0.23306233062330622 Hits@10 0.505420054200542 MRR 0.22187364171916918 rank 10 total_num 737 1429
checkcorrect (6783, 2, 6344) (6783, 2, 6344) real score 0.574674516916275 Hits@1 0.08930987821380243 Hits@3 0.2327469553450609 Hits@10 0.5047361299052774 MRR 0.22164106574931916 rank 19 total_num 738 1429
checkcorrect (8789, 2, 4628) (8789, 2, 4628) real score 0.5928609564900398 Hits@1 0.0891891891891892 Hits@3 0.23243243243243245 Hits@10 0.504054054054054 MRR 0.22139785710191015 rank 23 total_num 739 1429
checkcorrect (8090, 6, 8862) (8090, 6, 8862) real score 0.4225111424922943 Hits@1 0.08906882591093117 Hits@3 0.2321187584345479 Hits@10 0.50472334682861 MRR 0.22123402733524092 r

checkcorrect (7019, 2, 8520) (7019, 2, 8520) real score 0.651522883400321 Hits@1 0.09137709137709138 Hits@3 0.23294723294723294 Hits@10 0.510939510939511 MRR 0.22323745341501972 rank 0 total_num 776 1429
checkcorrect (7973, 2, 5378) (7973, 2, 5378) real score 0.6171260118484497 Hits@1 0.09125964010282776 Hits@3 0.2326478149100257 Hits@10 0.5102827763496144 MRR 0.22299336071568593 rank 29 total_num 777 1429
checkcorrect (8278, 2, 7512) (8278, 2, 7512) real score 0.557100248336792 Hits@1 0.09114249037227215 Hits@3 0.23234916559691912 Hits@10 0.5109114249037228 MRR 0.22284973780220124 rank 8 total_num 778 1429
checkcorrect (8248, 2, 8247) (8248, 2, 8247) real score 0.7075566947460175 Hits@1 0.09230769230769231 Hits@3 0.23333333333333334 Hits@10 0.5115384615384615 MRR 0.22384608429219843 rank 0 total_num 779 1429
checkcorrect (7683, 2, 7832) (7683, 2, 7832) real score 0.719734913110733 Hits@1 0.09218950064020487 Hits@3 0.2330345710627401 Hits@10 0.5121638924455826 MRR 0.22377287120945125 r

checkcorrect (6169, 2, 10389) (6169, 2, 10389) real score 0.9137125849723816 Hits@1 0.09046454767726161 Hits@3 0.2310513447432763 Hits@10 0.511002444987775 MRR 0.22231904181647733 rank 0 total_num 817 1429
checkcorrect (6672, 2, 4745) (6672, 2, 4745) real score 0.5936557233333588 Hits@1 0.09035409035409035 Hits@3 0.23076923076923078 Hits@10 0.5103785103785103 MRR 0.22210864005601766 rank 19 total_num 818 1429
checkcorrect (5051, 4, 9841) (5051, 4, 9841) real score 0.33471059501171113 Hits@1 0.09024390243902439 Hits@3 0.2304878048780488 Hits@10 0.5097560975609756 MRR 0.22193158449122138 rank 12 total_num 819 1429
checkcorrect (6069, 2, 4994) (6069, 2, 4994) real score 0.7289768785238266 Hits@1 0.09135200974421437 Hits@3 0.23142509135200975 Hits@10 0.510353227771011 MRR 0.22287929267089102 rank 0 total_num 820 1429
checkcorrect (4175, 2, 5385) (4175, 2, 5385) real score 0.7275012999773025 Hits@1 0.09124087591240876 Hits@3 0.23236009732360097 Hits@10 0.5109489051094891 MRR 0.2232164224851

checkcorrect (7550, 2, 4397) (7550, 2, 4397) real score 0.7505024611949921 Hits@1 0.08974358974358974 Hits@3 0.22727272727272727 Hits@10 0.5081585081585082 MRR 0.22101610411115774 rank 3 total_num 857 1429
checkcorrect (4352, 2, 6674) (4352, 2, 6674) real score 0.6455700397491455 Hits@1 0.08963911525029103 Hits@3 0.2270081490104773 Hits@10 0.5075669383003493 MRR 0.22085582149092745 rank 11 total_num 858 1429
checkcorrect (4018, 2, 8373) (4018, 2, 8373) real score 0.8502394586801529 Hits@1 0.09069767441860466 Hits@3 0.22790697674418606 Hits@10 0.5081395348837209 MRR 0.221761803093845 rank 0 total_num 859 1429
checkcorrect (4178, 2, 9698) (4178, 2, 9698) real score 0.6528841435909272 Hits@1 0.09059233449477352 Hits@3 0.22764227642276422 Hits@10 0.5075493612078978 MRR 0.22158720003732318 rank 13 total_num 860 1429
checkcorrect (5698, 4, 3961) (5698, 4, 3961) real score 0.3143123000860214 Hits@1 0.09048723897911833 Hits@3 0.2273781902552204 Hits@10 0.5069605568445475 MRR 0.2214193762286059

checkcorrect (7994, 2, 6532) (7994, 2, 6532) real score 0.37446613013744356 Hits@1 0.08908685968819599 Hits@3 0.22828507795100222 Hits@10 0.5055679287305123 MRR 0.22054062307416464 rank 33 total_num 897 1429
checkcorrect (3983, 4, 6542) (3983, 4, 6542) real score 0.4782349795103073 Hits@1 0.08898776418242492 Hits@3 0.22803114571746386 Hits@10 0.5050055617352615 MRR 0.22036073754172594 rank 16 total_num 898 1429
checkcorrect (8370, 2, 8303) (8370, 2, 8303) real score 0.6837692022323608 Hits@1 0.08888888888888889 Hits@3 0.2288888888888889 Hits@10 0.5055555555555555 MRR 0.22067144783334625 rank 1 total_num 899 1429
checkcorrect (5382, 2, 8969) (5382, 2, 8969) real score 0.5839632004499435 Hits@1 0.08879023307436182 Hits@3 0.2286348501664817 Hits@10 0.5049944506104328 MRR 0.22050580646108792 rank 13 total_num 900 1429
checkcorrect (8968, 2, 10424) (8968, 2, 10424) real score 0.5201440110802651 Hits@1 0.08869179600886919 Hits@3 0.22838137472283815 Hits@10 0.5055432372505543 MRR 0.2203722080

checkcorrect (7451, 2, 4478) (7451, 2, 4478) real score 0.39137501567602156 Hits@1 0.08955223880597014 Hits@3 0.23134328358208955 Hits@10 0.5063965884861408 MRR 0.22219004756968969 rank 20 total_num 937 1429
checkcorrect (9917, 0, 5354) (9917, 0, 5354) real score 0.9476903557777405 Hits@1 0.09052183173588925 Hits@3 0.2321618743343983 Hits@10 0.5069222577209798 MRR 0.2230183861771767 rank 0 total_num 938 1429
checkcorrect (10426, 2, 6776) (10426, 2, 6776) real score 0.6452031105756759 Hits@1 0.09042553191489362 Hits@3 0.23191489361702128 Hits@10 0.5063829787234042 MRR 0.22283712361629393 rank 18 total_num 939 1429
checkcorrect (5363, 2, 4477) (5363, 2, 4477) real score 0.5408937409520149 Hits@1 0.09032943676939426 Hits@3 0.2316684378320935 Hits@10 0.5058448459086079 MRR 0.22266282649174077 rank 16 total_num 940 1429
checkcorrect (9694, 2, 6795) (9694, 2, 6795) real score 0.5120452627539634 Hits@1 0.09023354564755838 Hits@3 0.23142250530785563 Hits@10 0.505307855626327 MRR 0.222470686194

checkcorrect (9245, 4, 9042) (9245, 4, 9042) real score 0.4495934799313545 Hits@1 0.09100204498977506 Hits@3 0.22903885480572597 Hits@10 0.5030674846625767 MRR 0.22187349100826437 rank 24 total_num 977 1429
checkcorrect (5703, 4, 5371) (5703, 4, 5371) real score 0.4156796246767044 Hits@1 0.09090909090909091 Hits@3 0.22880490296220635 Hits@10 0.5035750766087844 MRR 0.2217603527244062 rank 8 total_num 978 1429
checkcorrect (4110, 2, 10660) (4110, 2, 10660) real score 0.6943950831890107 Hits@1 0.09081632653061225 Hits@3 0.22857142857142856 Hits@10 0.5040816326530613 MRR 0.22178916869101395 rank 3 total_num 979 1429
checkcorrect (4960, 2, 4577) (4960, 2, 4577) real score 0.6966991126537323 Hits@1 0.09072375127421 Hits@3 0.22833843017329256 Hits@10 0.5035677879714577 MRR 0.22165575372895288 rank 10 total_num 980 1429
checkcorrect (5535, 4, 10861) (5535, 4, 10861) real score 0.4057989776134491 Hits@1 0.09063136456211812 Hits@3 0.22810590631364563 Hits@10 0.5030549898167006 MRR 0.221502772891

checkcorrect (6737, 4, 5821) (6737, 4, 5821) real score 0.3798038728535175 Hits@1 0.09332023575638507 Hits@3 0.2337917485265226 Hits@10 0.5068762278978389 MRR 0.22459939899901427 rank 14 total_num 1017 1429
checkcorrect (7748, 2, 7747) (7748, 2, 7747) real score 0.738845306634903 Hits@1 0.09322865554465162 Hits@3 0.23454367026496564 Hits@10 0.507360157016683 MRR 0.2247061055096466 rank 2 total_num 1018 1429
checkcorrect (10246, 4, 10066) (10246, 4, 10066) real score 0.0 Hits@1 0.09313725490196079 Hits@3 0.23431372549019608 Hits@10 0.5068627450980392 MRR 0.22451094366663812 rank 38 total_num 1019 1429
checkcorrect (5363, 2, 9659) (5363, 2, 9659) real score 0.6985831975936889 Hits@1 0.0930460333006856 Hits@3 0.23506366307541626 Hits@10 0.5073457394711067 MRR 0.22478076644463357 rank 1 total_num 1020 1429
checkcorrect (8780, 4, 4026) (8780, 4, 4026) real score 0.2870675951242447 Hits@1 0.09295499021526418 Hits@3 0.23483365949119372 Hits@10 0.5068493150684932 MRR 0.22459996334635116 rank 2

checkcorrect (4336, 4, 7833) (4336, 4, 7833) real score 0.2952104642987251 Hits@1 0.09546313799621928 Hits@3 0.24196597353497165 Hits@10 0.5113421550094518 MRR 0.2284904812428123 rank 29 total_num 1057 1429
checkcorrect (7436, 4, 10536) (7436, 4, 10536) real score 0.7527638614177704 Hits@1 0.09537299338999056 Hits@3 0.24173748819641172 Hits@10 0.5108593012275732 MRR 0.22834735810370016 rank 12 total_num 1058 1429
checkcorrect (9501, 2, 4897) (9501, 2, 4897) real score 0.3908513031899929 Hits@1 0.09528301886792453 Hits@3 0.24150943396226415 Hits@10 0.5103773584905661 MRR 0.2281605238321901 rank 32 total_num 1059 1429
checkcorrect (8040, 2, 10658) (8040, 2, 10658) real score 0.5101608008146286 Hits@1 0.09519321394910461 Hits@3 0.2412818096135721 Hits@10 0.5098963242224317 MRR 0.2279950865608566 rank 18 total_num 1060 1429
checkcorrect (10416, 2, 6256) (10416, 2, 6256) real score 0.7221141219139099 Hits@1 0.0951035781544256 Hits@3 0.24105461393596986 Hits@10 0.5103578154425612 MRR 0.22801

checkcorrect (9389, 8, 6065) (9389, 8, 6065) real score 0.5356446746736765 Hits@1 0.09380692167577413 Hits@3 0.2395264116575592 Hits@10 0.5091074681238615 MRR 0.2269712292573596 rank 0 total_num 1097 1429
checkcorrect (6359, 2, 8117) (6359, 2, 8117) real score 0.746324610710144 Hits@1 0.09372156505914468 Hits@3 0.23930846223839855 Hits@10 0.5095541401273885 MRR 0.2269921835528488 rank 3 total_num 1098 1429
checkcorrect (5809, 4, 5971) (5809, 4, 5971) real score 0.40788400918245316 Hits@1 0.09363636363636364 Hits@3 0.2390909090909091 Hits@10 0.509090909090909 MRR 0.22682714933639578 rank 21 total_num 1099 1429
checkcorrect (9503, 2, 9916) (9503, 2, 9916) real score 0.7331463098526001 Hits@1 0.09355131698455948 Hits@3 0.23978201634877383 Hits@10 0.5095367847411444 MRR 0.2269238851983367 rank 2 total_num 1100 1429
checkcorrect (8430, 2, 4643) (8430, 2, 4643) real score 0.6560462534427642 Hits@1 0.09346642468239565 Hits@3 0.2395644283121597 Hits@10 0.5090744101633394 MRR 0.2267935852420163

checkcorrect (8618, 2, 6905) (8618, 2, 6905) real score 0.6986788094043732 Hits@1 0.09306409130816505 Hits@3 0.2405618964003512 Hits@10 0.5136084284460053 MRR 0.22761354029776898 rank 7 total_num 1138 1429
checkcorrect (7724, 2, 6021) (7724, 2, 6021) real score 0.725216680765152 Hits@1 0.09298245614035087 Hits@3 0.2412280701754386 Hits@10 0.5140350877192983 MRR 0.2277062769583265 rank 2 total_num 1139 1429
checkcorrect (5947, 2, 4997) (5947, 2, 4997) real score 0.7471145361661911 Hits@1 0.09290096406660824 Hits@3 0.24189307624890447 Hits@10 0.5144609991235758 MRR 0.22779885106557887 rank 2 total_num 1140 1429
checkcorrect (8411, 2, 7467) (8411, 2, 7467) real score 0.7137566447257996 Hits@1 0.09281961471103327 Hits@3 0.2425569176882662 Hits@10 0.5148861646234676 MRR 0.22789126304654886 rank 2 total_num 1141 1429
checkcorrect (5701, 2, 7127) (5701, 2, 7127) real score 0.7140806317329407 Hits@1 0.09273840769903761 Hits@3 0.24234470691163604 Hits@10 0.5144356955380578 MRR 0.227771418626481

checkcorrect (7897, 4, 10158) (7897, 4, 10158) real score 0.49404591172933576 Hits@1 0.09245122985581 Hits@3 0.24173027989821882 Hits@10 0.5131467345207803 MRR 0.22720258203000884 rank 8 total_num 1178 1429
checkcorrect (8669, 2, 8668) (8669, 2, 8668) real score 0.0 Hits@1 0.0923728813559322 Hits@3 0.24152542372881355 Hits@10 0.5127118644067796 MRR 0.22704688345275423 rank 22 total_num 1179 1429
checkcorrect (6918, 2, 9299) (6918, 2, 9299) real score 0.6110413432121277 Hits@1 0.09229466553767993 Hits@3 0.24132091447925486 Hits@10 0.5122777307366638 MRR 0.22691511519532476 rank 13 total_num 1180 1429
checkcorrect (10874, 4, 6068) (10874, 4, 6068) real score 0.8933352887630462 Hits@1 0.09306260575296109 Hits@3 0.24196277495769883 Hits@10 0.5126903553299492 MRR 0.22756916332121704 rank 0 total_num 1181 1429
checkcorrect (7907, 4, 5695) (7907, 4, 5695) real score 0.394451991468668 Hits@1 0.09298393913778528 Hits@3 0.24175824175824176 Hits@10 0.5122569737954353 MRR 0.22740321305636393 rank 

checkcorrect (7336, 2, 10002) (7336, 2, 10002) real score 0.7628233134746552 Hits@1 0.09269893355209188 Hits@3 0.24446267432321575 Hits@10 0.5151763740771124 MRR 0.22862792634693566 rank 1 total_num 1218 1429
checkcorrect (9833, 4, 8245) (9833, 4, 8245) real score 0.0 Hits@1 0.09262295081967213 Hits@3 0.2442622950819672 Hits@10 0.5147540983606558 MRR 0.22845958854989185 rank 42 total_num 1219 1429
checkcorrect (9299, 2, 6918) (9299, 2, 6918) real score 0.6169927418231964 Hits@1 0.09254709254709255 Hits@3 0.24406224406224405 Hits@10 0.5143325143325144 MRR 0.2283097072697155 rank 21 total_num 1220 1429
checkcorrect (7328, 2, 6956) (7328, 2, 6956) real score 0.6382450133562088 Hits@1 0.09247135842880523 Hits@3 0.24386252045826515 Hits@10 0.513911620294599 MRR 0.22816594448058097 rank 18 total_num 1221 1429
checkcorrect (6129, 2, 5571) (6129, 2, 5571) real score 0.7330211937427521 Hits@1 0.09321340964840556 Hits@3 0.24448078495502862 Hits@10 0.5143090760425184 MRR 0.22879704346301713 rank 

checkcorrect (4049, 6, 10567) (4049, 6, 10567) real score 0.7744194626808166 Hits@1 0.09213661636219221 Hits@3 0.24384432088959493 Hits@10 0.5138999205718825 MRR 0.228025200404693 rank 5 total_num 1258 1429
checkcorrect (4771, 0, 8448) (4771, 0, 8448) real score 0.10710040926933288 Hits@1 0.09206349206349207 Hits@3 0.24365079365079365 Hits@10 0.5134920634920634 MRR 0.2279163780955695 rank 10 total_num 1259 1429
checkcorrect (8875, 2, 4898) (8875, 2, 4898) real score 0.666339322924614 Hits@1 0.09199048374306107 Hits@3 0.24345757335448057 Hits@10 0.5138778747026169 MRR 0.2279338908805849 rank 3 total_num 1260 1429
checkcorrect (3939, 2, 5453) (3939, 2, 5453) real score 0.7605455338954925 Hits@1 0.0919175911251981 Hits@3 0.24405705229793978 Hits@10 0.5142630744849446 MRR 0.2281494741683182 rank 1 total_num 1261 1429
checkcorrect (10552, 4, 7949) (10552, 4, 7949) real score 0.8780369877815246 Hits@1 0.09184481393507522 Hits@3 0.24465558194774348 Hits@10 0.5146476642913698 MRR 0.22836471607

checkcorrect (4289, 2, 4327) (4289, 2, 4327) real score 0.6782160639762879 Hits@1 0.09160892994611239 Hits@3 0.24403387220939185 Hits@10 0.5173210161662818 MRR 0.2287501954939034 rank 5 total_num 1298 1429
checkcorrect (8241, 2, 8240) (8241, 2, 8240) real score 0.7395528078079223 Hits@1 0.09153846153846154 Hits@3 0.24384615384615385 Hits@10 0.5176923076923077 MRR 0.22868412391495208 rank 6 total_num 1299 1429
checkcorrect (4310, 2, 6195) (4310, 2, 6195) real score 0.6044537723064423 Hits@1 0.09146810146041506 Hits@3 0.2436587240584166 Hits@10 0.5172943889315911 MRR 0.22855105045733531 rank 17 total_num 1300 1429
checkcorrect (8080, 4, 6009) (8080, 4, 6009) real score 0.7002768278121948 Hits@1 0.0913978494623656 Hits@3 0.2434715821812596 Hits@10 0.5176651305683564 MRR 0.22847151816051708 rank 7 total_num 1301 1429
checkcorrect (4229, 4, 4549) (4229, 4, 4549) real score 0.445256832242012 Hits@1 0.09132770529547199 Hits@3 0.24328472755180353 Hits@10 0.5172678434382195 MRR 0.22836013045151

checkcorrect (4146, 2, 7624) (4146, 2, 7624) real score 0.5715389430522919 Hits@1 0.09185959671396565 Hits@3 0.24421209858103063 Hits@10 0.5175504107542942 MRR 0.22879144415157618 rank 13 total_num 1338 1429
checkcorrect (4155, 2, 8485) (4155, 2, 8485) real score 0.5351554471999407 Hits@1 0.09179104477611941 Hits@3 0.24402985074626865 Hits@10 0.5171641791044777 MRR 0.22866216363769853 rank 17 total_num 1339 1429
checkcorrect (7104, 2, 8712) (7104, 2, 8712) real score 0.7172119498252869 Hits@1 0.09172259507829977 Hits@3 0.24384787472035793 Hits@10 0.517524235645041 MRR 0.2285981777907332 rank 6 total_num 1340 1429
checkcorrect (7492, 4, 5407) (7492, 4, 5407) real score 0.7157328486442566 Hits@1 0.09165424739195231 Hits@3 0.2436661698956781 Hits@10 0.5178837555886736 MRR 0.22855202912372571 rank 5 total_num 1341 1429
checkcorrect (5195, 6, 6773) (5195, 6, 6773) real score 0.4216143973171711 Hits@1 0.09158600148920328 Hits@3 0.24348473566641846 Hits@10 0.5182427401340283 MRR 0.22853076923

checkcorrect (8303, 2, 8871) (8303, 2, 8871) real score 0.5281083792448044 Hits@1 0.09137055837563451 Hits@3 0.242929659173314 Hits@10 0.5155910079767948 MRR 0.22782551599954895 rank 24 total_num 1378 1429
checkcorrect (5349, 2, 5348) (5349, 2, 5348) real score 0.6371071636676788 Hits@1 0.09130434782608696 Hits@3 0.2427536231884058 Hits@10 0.5152173913043478 MRR 0.22769856387125026 rank 18 total_num 1379 1429
checkcorrect (9416, 8, 7216) (9416, 8, 7216) real score 0.6353716000914573 Hits@1 0.09123823316437364 Hits@3 0.2433019551049964 Hits@10 0.5155684286748733 MRR 0.2277750553770157 rank 2 total_num 1380 1429
checkcorrect (8035, 4, 10140) (8035, 4, 10140) real score 0.6932038009166718 Hits@1 0.09117221418234443 Hits@3 0.24312590448625182 Hits@10 0.515918958031838 MRR 0.2276906386300794 rank 8 total_num 1381 1429
checkcorrect (6392, 2, 8676) (6392, 2, 8676) real score 0.7151348471641541 Hits@1 0.0911062906724512 Hits@3 0.24295010845986983 Hits@10 0.5162689804772235 MRR 0.22762929843067

checkcorrect (6337, 4, 7629) (6337, 4, 7629) real score 0.6121881663799286 Hits@1 0.0923185341789993 Hits@3 0.2459478505990134 Hits@10 0.5200845665961945 MRR 0.2295552872711329 rank 6 total_num 1418 1429
checkcorrect (7152, 4, 3960) (7152, 4, 3960) real score 0.2718981817364693 Hits@1 0.09225352112676057 Hits@3 0.24577464788732395 Hits@10 0.5197183098591549 MRR 0.22941496878031026 rank 32 total_num 1419 1429
checkcorrect (10384, 4, 6155) (10384, 4, 6155) real score 0.4668690234422684 Hits@1 0.09218859957776214 Hits@3 0.24560168895144266 Hits@10 0.5193525686136523 MRR 0.22927865584359947 rank 27 total_num 1420 1429
checkcorrect (5111, 2, 9881) (5111, 2, 9881) real score 0.6477426081895828 Hits@1 0.09212376933895922 Hits@3 0.24542897327707455 Hits@10 0.519690576652602 MRR 0.22918774258351257 rank 9 total_num 1421 1429
checkcorrect (9256, 4, 9655) (9256, 4, 9655) real score 0.2172418087720871 Hits@1 0.09205903021784961 Hits@3 0.24525650035137034 Hits@10 0.5193253689388616 MRR 0.2290462036

#### Fine tuned

In [34]:
#function to build the big batche for path-based training
def build_big_batches_path(lower_bd, upper_bd, data, one_hop, s_t_r,
                      x_p_list, x_r_list, y_list,
                      relation2id, entity2id, id2relation, id2entity):
    
    #the set of all relation IDs
    relation_id_set = set()
    
    #the set of all initial relations
    ini_r_id_set = set()
    
    for i in range(len(id2relation)):
        
        if i not in id2relation:
            raise ValueError('error when generaing id2relation')
        
        relation_id_set.add(i)
        
        if i % 2 == 0: #initial relation id is always an even number
            ini_r_id_set.add(i)
    
    num_r = len(id2relation)
    num_ini_r = len(ini_r_id_set)
    
    if num_ini_r != int(num_r/2):
        raise ValueError('error when generating id2relation')
    
    #in case not all entities in entity2id are in one_hop, 
    #so we need to find out who are indeed in
    existing_ids = set()
    
    for s_1 in one_hop:
        existing_ids.add(s_1)
        
    existing_ids = list(existing_ids)
    random.shuffle(existing_ids)
    
    count = 0
    for s in existing_ids:
        
        #impliment the path finding algorithm to find paths between s and t
        result, length_dict = Class_2.obtain_paths('direct_neighbour', s, 'nb', lower_bd, upper_bd, one_hop)
        
        for iteration in range(2):

            #proceed only if at least three paths are between s and t
            for t in result:

                if len(s_t_r[(s,t)]) == 0:

                    raise ValueError(s,t,id2entity[s], id2entity[t])

                #we are only interested in forward link in relation prediciton
                ini_r_list = list()

                #obtain initial relations between s and t
                for r in s_t_r[(s,t)]:
                    if r % 2 == 0:#initial relation id is always an even number
                        ini_r_list.append(r)

                #if there exist more than three paths between s and t, 
                #and inital connection between s and t exists,
                #and not every r in the relation dictionary exists between s and t (although this is rare)
                #we then proceed
                if len(result[t]) >= 3 and len(ini_r_list) > 0 and len(ini_r_list) < int(num_ini_r):

                    #obtain the list form of all the paths from s to t
                    temp_path_list = list(result[t])

                    temp_pair = random.sample(temp_path_list, 3)

                    path_1, path_2, path_3 = temp_pair[0], temp_pair[1], temp_pair[2]

                    #####positive#####################
                    #append the paths: note that we add the space holder id at the end of the shorter path
                    x_p_list['1'].append(list(path_1) + [num_r]*abs(len(path_1)-upper_bd))
                    x_p_list['2'].append(list(path_2) + [num_r]*abs(len(path_2)-upper_bd))
                    x_p_list['3'].append(list(path_3) + [num_r]*abs(len(path_3)-upper_bd))

                    #append relation
                    r = random.choice(ini_r_list)
                    x_r_list.append([r])
                    y_list.append(1.)

                    #####negative#####################
                    #append the paths: note that we add the space holder id at the end
                    #of the shorter path
                    x_p_list['1'].append(list(path_1) + [num_r]*abs(len(path_1)-upper_bd))
                    x_p_list['2'].append(list(path_2) + [num_r]*abs(len(path_2)-upper_bd))
                    x_p_list['3'].append(list(path_3) + [num_r]*abs(len(path_3)-upper_bd))

                    #append relation
                    neg_r_list = list(ini_r_id_set.difference(set(ini_r_list)))
                    r_ran = random.choice(neg_r_list)
                    x_r_list.append([r_ran])
                    y_list.append(0.)
        
        count += 1
        if count % 100 == 0:
            print('generating big-batches for path-based model', count, len(existing_ids))

In [35]:
#Again, it is too slow to run the path-finding algorithm again and again on the complete FB15K-237
#Instead, we will find the subgraph for each entity once.
#then in the subgraph based training, the subgraphs are stored and used for multiple times
def store_subgraph_dicts(lower_bd, upper_bd, data, one_hop, s_t_r,
                         relation2id, entity2id, id2relation, id2entity):
    
    #the set of all relation IDs
    relation_id_set = set()
    
    for i in range(len(id2relation)):
        
        if i not in id2relation:
            raise ValueError('error when generaing id2relation')
        
        relation_id_set.add(i)
    
    num_r = len(id2relation)
    
    #in case not all entities in entity2id are in one_hop, 
    #so we need to find out who are indeed in
    existing_ids = set()
    
    for s_1 in one_hop:
        existing_ids.add(s_1)
    
    #the ids to start path finding
    existing_ids = list(existing_ids)
    random.shuffle(existing_ids)
    
    #Dict stores the subgraph for each entity
    Dict_1 = dict()
    
    count = 0
    for s in existing_ids:
        
        path_set = set()
            
        result, length_dict = Class_2.obtain_paths('any_target', s, 'any', lower_bd, upper_bd, one_hop)

        for t_ in result:
            for path in result[t_]:
                path_set.add(path)

        del(result, length_dict)
        
        path_list = list(path_set)
        
        path_select = random.sample(path_list, min(len(path_list), 100))
            
        Dict_1[s] = deepcopy(path_select)
        
        count += 1
        if count % 100 == 0:
            print('generating and storing paths for the path-based model', count, len(existing_ids))
        
    return(Dict_1)

In [36]:
#function to build the big-batch for one-hope neighbor training
def build_big_batches_subgraph(lower_bd, upper_bd, data, one_hop, s_t_r,
                      x_s_list, x_t_list, x_r_list, y_list, Dict,
                      relation2id, entity2id, id2relation, id2entity):
    
    #the set of all relation IDs
    relation_id_set = set()
    
    #the set of all initial relations
    ini_r_id_set = set()
    
    for i in range(len(id2relation)):
        
        if i not in id2relation:
            raise ValueError('error when generaing id2relation')
        
        relation_id_set.add(i)
        
        if i % 2 == 0: #initial relation id is always an even number
            ini_r_id_set.add(i)
    
    num_r = len(id2relation)
    num_ini_r = len(ini_r_id_set)
    
    if num_ini_r != int(num_r/2):
        raise ValueError('error when generating id2relation')
        
    #if an entity has at least three out-stretching paths, it is a qualified one
    qualified = set()
    for e in Dict:
        if len(Dict[e]) >= 3:
            qualified.add(e)
    qualified = list(qualified)
    
    data = list(data)
    
    for iteration in range(2):

        data = shuffle(data)

        for i_0 in range(len(data)):

            triple = data[i_0]

            s, r, t = triple[0], triple[1], triple[2] #obtain entities and relation IDs

            if s in qualified and t in qualified:

                #obtain the path list for true entities
                path_s, path_t = list(Dict[s]), list(Dict[t])

                #####positive step###########
                #randomly obtain three paths for true entities
                temp_s = random.sample(path_s, 3)
                temp_t = random.sample(path_t, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(1.)

                #####negative step for relation###########
                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                neg_r_list = list(ini_r_id_set.difference({r}))
                r_ran = random.choice(neg_r_list)
                x_r_list.append([r_ran])
                y_list.append(0.)
                
                ##############################################
                ##############################################
                #randomly choose two negative sampled entities
                s_ran = random.choice(qualified)
                t_ran = random.choice(qualified)

                #obtain the path list for random entities
                path_s_ran, path_t_ran = list(Dict[s_ran]), list(Dict[t_ran])
                
                #####positive step#################
                #Again: randomly obtain three paths
                temp_s = random.sample(path_s, 3)
                temp_t = random.sample(path_t, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(1.)

                #####negative for source entity###########
                #randomly obtain three paths
                temp_s = random.sample(path_s_ran, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(0.)

                #####positive step###########
                #Again: randomly obtain three paths
                temp_s = random.sample(path_s, 3)
                temp_t = random.sample(path_t, 3)
                s_p_1, s_p_2, s_p_3 = temp_s[0], temp_s[1], temp_s[2]
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(1.)

                #####negative for target entity###########
                #randomly obtain three paths
                temp_t = random.sample(path_t_ran, 3)
                t_p_1, t_p_2, t_p_3 = temp_t[0], temp_t[1], temp_t[2]

                #append the paths: note that we add the space holder id at the end of the shorter path
                x_s_list['1'].append(list(s_p_1) + [num_r]*abs(len(s_p_1)-upper_bd))
                x_s_list['2'].append(list(s_p_2) + [num_r]*abs(len(s_p_2)-upper_bd))
                x_s_list['3'].append(list(s_p_3) + [num_r]*abs(len(s_p_3)-upper_bd))

                x_t_list['1'].append(list(t_p_1) + [num_r]*abs(len(t_p_1)-upper_bd))
                x_t_list['2'].append(list(t_p_2) + [num_r]*abs(len(t_p_2)-upper_bd))
                x_t_list['3'].append(list(t_p_3) + [num_r]*abs(len(t_p_3)-upper_bd))

                #append relation
                x_r_list.append([r])
                y_list.append(0.)

            if i_0 % 200 == 0:
                print('generating big-batches for subgraph-based model', i_0, len(data), iteration)

In [37]:
###fine tune the path-based model
lower_bd = lower_bound
upper_bd = upper_bound_path
batch_size = 32

#define the training lists
train_p_list, train_r_list, train_y_list = {'1': [], '2': [], '3': []}, list(), list()

#######################################
###build the big-batches###############      

#fill in the training array list
build_big_batches_path(lower_bd, upper_bd, data_ind, one_hop_ind, s_t_r_ind,
                      train_p_list, train_r_list, train_y_list,
                      relation2id, entity2id, id2relation, id2entity)   

#######################################
###do the training#####################

#generate the input arrays
x_train_1 = np.asarray(train_p_list['1'], dtype='int')
x_train_2 = np.asarray(train_p_list['2'], dtype='int')
x_train_3 = np.asarray(train_p_list['3'], dtype='int')
x_train_r = np.asarray(train_r_list, dtype='int')
y_train = np.asarray(train_y_list, dtype='int')

model.fit([x_train_1, x_train_2, x_train_3, x_train_r], y_train,
           batch_size=batch_size, epochs=5)

generating big-batches for path-based model 100 922
generating big-batches for path-based model 200 922
generating big-batches for path-based model 300 922
generating big-batches for path-based model 400 922
generating big-batches for path-based model 500 922
generating big-batches for path-based model 600 922
generating big-batches for path-based model 700 922
generating big-batches for path-based model 800 922
generating big-batches for path-based model 900 922
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7ff2daab26a0>

In [38]:
###fine tune the subgraph model
lower_bd = lower_bound
upper_bd = upper_bound_subg
batch_size = 32

Dict_train_ind = store_subgraph_dicts(lower_bd, upper_bd, data_ind, one_hop_ind, s_t_r_ind,
                         relation2id, entity2id, id2relation, id2entity)

#define the training lists
train_s_list, train_t_list, train_r_list, train_y_list = {'1': [], '2': [], '3': []}, {'1': [], '2': [], '3': []}, list(), list()

#######################################
###build the big-batches###############      

#fill in the training array list
build_big_batches_subgraph(lower_bd, upper_bd, data_ind, one_hop_ind, s_t_r_ind,
                      train_s_list, train_t_list, train_r_list, train_y_list, Dict_train_ind,
                      relation2id, entity2id, id2relation, id2entity)

#######################################
###do the training#####################

#generate the input arrays
x_train_s_1 = np.asarray(train_s_list['1'], dtype='int')
x_train_s_2 = np.asarray(train_s_list['2'], dtype='int')
x_train_s_3 = np.asarray(train_s_list['3'], dtype='int')

x_train_t_1 = np.asarray(train_t_list['1'], dtype='int')
x_train_t_2 = np.asarray(train_t_list['2'], dtype='int')
x_train_t_3 = np.asarray(train_t_list['3'], dtype='int')

x_train_r = np.asarray(train_r_list, dtype='int')
y_train = np.asarray(train_y_list, dtype='int')

model_2.fit([x_train_s_1, x_train_s_2, x_train_s_3, 
             x_train_t_1, x_train_t_2, x_train_t_3, x_train_r], y_train,
             batch_size=batch_size, epochs=5)

generating and storing paths for the path-based model 100 922
generating and storing paths for the path-based model 200 922
generating and storing paths for the path-based model 300 922
generating and storing paths for the path-based model 400 922
generating and storing paths for the path-based model 500 922
generating and storing paths for the path-based model 600 922
generating and storing paths for the path-based model 700 922
generating and storing paths for the path-based model 800 922
generating and storing paths for the path-based model 900 922
generating big-batches for subgraph-based model 0 1618 0
generating big-batches for subgraph-based model 200 1618 0
generating big-batches for subgraph-based model 400 1618 0
generating big-batches for subgraph-based model 600 1618 0
generating big-batches for subgraph-based model 800 1618 0
generating big-batches for subgraph-based model 1000 1618 0
generating big-batches for subgraph-based model 1200 1618 0
generating big-batches for su

<keras.callbacks.History at 0x7ff290652df0>

In [39]:
########################################################
#obtain the Hits@N for relation prediction##############

#we select all the triples in the inductive test set
selected = list(data_ind_test)

###Hit at 1#############################
#generate the negative samples by randomly replace relation with all the other relaiton
Hits_at_1 = 0
Hits_at_3 = 0
Hits_at_10 = 0
MRR_raw = 0.

for i in range(len(selected)):
    
    s_true, r_true, t_true = selected[i][0], selected[i][1], selected[i][2]
    
    #run the path-based scoring
    score_dict_path = path_based_relation_scoring(s_true, t_true, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)
    
    #run the one-hop neighbour based scoring
    score_dict_subg = subgraph_relation_scoring(s_true, t_true, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #final score dict
    score_dict = defaultdict(float)
    
    for r in score_dict_path:
        score_dict[r] += score_dict_path[r]
    for r in score_dict_subg:
        score_dict[r] += score_dict_subg[r]
    
    #[... [score, r], ...]
    temp_list = list()
    
    for r in id2relation:
        
        #again, we only care about initial relation prediciton
        if r % 2 == 0:
        
            if r in score_dict:

                temp_list.append([score_dict[r], r])

            else:

                temp_list.append([0.0, r])
        
    sorted_list = sorted(temp_list, key = lambda x: x[0], reverse=True)
    
    p = 0
    exist_tri = 0
    
    while p < len(sorted_list) and sorted_list[p][1] != r_true:
        
        #moreover, we want to remove existing triples
        if ((s_true, sorted_list[p][1], t_true) in data_test) or (
            (s_true, sorted_list[p][1], t_true) in data_valid) or (
            (s_true, sorted_list[p][1], t_true) in data) or (
            (s_true, sorted_list[p][1], t_true) in data_ind) or (
            (s_true, sorted_list[p][1], t_true) in data_ind_valid) or (
            (s_true, sorted_list[p][1], t_true) in data_ind_test):
            
            exist_tri += 1
            
        p += 1
    
    if p - exist_tri == 0:
        
        Hits_at_1 += 1
        
    if p - exist_tri < 3:
        
        Hits_at_3 += 1
        
    if p - exist_tri < 10:
        
        Hits_at_10 += 1
        
    MRR_raw += 1./float(p - exist_tri + 1.) 
        
    print('checkcorrect', r_true, sorted_list[p][1],
          'real score', sorted_list[p][0],
          'Hits@1', Hits_at_1/(i+1),
          'Hits@3', Hits_at_3/(i+1),
          'Hits@10', Hits_at_10/(i+1),
          'MRR', MRR_raw/(i+1),
          'cur_rank', p - exist_tri,
          'abs_cur_rank', p,
          'total_num', i, len(selected))

0 0
9 10 10
checkcorrect 0 0 real score 0.3973050579428673 Hits@1 0.0 Hits@3 1.0 Hits@10 1.0 MRR 0.5 cur_rank 1 abs_cur_rank 1 total_num 0 188
0 1
9 11 31
checkcorrect 0 0 real score 0.7147584974765777 Hits@1 0.5 Hits@3 1.0 Hits@10 1.0 MRR 0.75 cur_rank 0 abs_cur_rank 0 total_num 1 188
0 1
9 34 10
checkcorrect 0 0 real score 0.7171109318733215 Hits@1 0.6666666666666666 Hits@3 1.0 Hits@10 1.0 MRR 0.8333333333333334 cur_rank 0 abs_cur_rank 0 total_num 2 188
9 61
9 13 22
checkcorrect 0 0 real score 1.6940622508525849 Hits@1 0.75 Hits@3 1.0 Hits@10 1.0 MRR 0.875 cur_rank 0 abs_cur_rank 0 total_num 3 188
9 11
9 47 31
checkcorrect 0 0 real score 1.4414083257317545 Hits@1 0.8 Hits@3 1.0 Hits@10 1.0 MRR 0.9 cur_rank 0 abs_cur_rank 0 total_num 4 188
0 2
9 14 58
checkcorrect 8 8 real score 0.2447445958852768 Hits@1 0.6666666666666666 Hits@3 1.0 Hits@10 1.0 MRR 0.8333333333333334 cur_rank 1 abs_cur_rank 1 total_num 5 188
9 30
9 22 9
checkcorrect 8 8 real score 0.5786192186176777 Hits@1 0.57142857

9 38
9 18 8
checkcorrect 0 0 real score 1.2535340383648874 Hits@1 0.7843137254901961 Hits@3 0.9803921568627451 Hits@10 1.0 MRR 0.8830065359477124 cur_rank 0 abs_cur_rank 0 total_num 50 188
9 54
9 23 20
checkcorrect 0 0 real score 1.7500863373279572 Hits@1 0.7884615384615384 Hits@3 0.9807692307692307 Hits@10 1.0 MRR 0.8852564102564102 cur_rank 0 abs_cur_rank 0 total_num 51 188
9 5
9 71 28
checkcorrect 0 0 real score 1.5344414383172988 Hits@1 0.7924528301886793 Hits@3 0.9811320754716981 Hits@10 1.0 MRR 0.8874213836477987 cur_rank 0 abs_cur_rank 0 total_num 52 188
9 53
9 35 13
checkcorrect 0 0 real score 1.4713187865912913 Hits@1 0.7962962962962963 Hits@3 0.9814814814814815 Hits@10 1.0 MRR 0.8895061728395062 cur_rank 0 abs_cur_rank 0 total_num 53 188
9 4
9 23 22
checkcorrect 10 10 real score 1.7709940746426582 Hits@1 0.8 Hits@3 0.9818181818181818 Hits@10 1.0 MRR 0.8915151515151515 cur_rank 0 abs_cur_rank 0 total_num 54 188
9 86
9 32 19
checkcorrect 0 0 real score 1.4118973553180694 Hits@1

9 150
9 12 13
checkcorrect 0 0 real score 1.5491866767406464 Hits@1 0.8526315789473684 Hits@3 0.9789473684210527 Hits@10 1.0 MRR 0.9164912280701754 cur_rank 0 abs_cur_rank 0 total_num 94 188
9 7
9 26 9
checkcorrect 0 0 real score 1.6799812585115432 Hits@1 0.8541666666666666 Hits@3 0.9791666666666666 Hits@10 1.0 MRR 0.9173611111111111 cur_rank 0 abs_cur_rank 0 total_num 95 188
0 0
0 28 1
checkcorrect 0 0 real score 0.0 Hits@1 0.8556701030927835 Hits@3 0.979381443298969 Hits@10 1.0 MRR 0.9182130584192439 cur_rank 0 abs_cur_rank 0 total_num 96 188
0 0
9 32 5
checkcorrect 8 8 real score 0.2664215698838234 Hits@1 0.8469387755102041 Hits@3 0.9795918367346939 Hits@10 1.0 MRR 0.9139455782312925 cur_rank 1 abs_cur_rank 1 total_num 97 188
9 65
9 27 34
checkcorrect 0 0 real score 1.5753029331564905 Hits@1 0.8484848484848485 Hits@3 0.9797979797979798 Hits@10 1.0 MRR 0.9148148148148147 cur_rank 0 abs_cur_rank 0 total_num 98 188
0 1
9 15 46
checkcorrect 0 0 real score 0.6504505783319473 Hits@1 0.85 

9 45
9 28 37
checkcorrect 0 0 real score 1.6402869790792465 Hits@1 0.8561151079136691 Hits@3 0.9784172661870504 Hits@10 1.0 MRR 0.9189448441247002 cur_rank 0 abs_cur_rank 0 total_num 138 188
0 1
9 45 22
checkcorrect 0 0 real score 0.6212806850671768 Hits@1 0.8571428571428571 Hits@3 0.9785714285714285 Hits@10 1.0 MRR 0.9195238095238096 cur_rank 0 abs_cur_rank 0 total_num 139 188
9 93
9 31 26
checkcorrect 0 0 real score 1.6512427419424056 Hits@1 0.8581560283687943 Hits@3 0.9787234042553191 Hits@10 1.0 MRR 0.9200945626477542 cur_rank 0 abs_cur_rank 0 total_num 140 188
9 87
9 39 44
checkcorrect 0 0 real score 1.5928798407316207 Hits@1 0.8591549295774648 Hits@3 0.9788732394366197 Hits@10 1.0 MRR 0.9206572769953053 cur_rank 0 abs_cur_rank 0 total_num 141 188
0 2
9 14 9
checkcorrect 0 0 real score 0.46829487979412077 Hits@1 0.8531468531468531 Hits@3 0.9790209790209791 Hits@10 1.0 MRR 0.9177156177156178 cur_rank 1 abs_cur_rank 1 total_num 142 188
9 132
9 58 37
checkcorrect 0 0 real score 1.661

9 16 34
checkcorrect 0 0 real score 0.6142259627580643 Hits@1 0.8524590163934426 Hits@3 0.9726775956284153 Hits@10 1.0 MRR 0.9163023679417123 cur_rank 0 abs_cur_rank 0 total_num 182 188
9 4
9 22 19
checkcorrect 10 10 real score 1.9311034083366394 Hits@1 0.8532608695652174 Hits@3 0.9728260869565217 Hits@10 1.0 MRR 0.9167572463768117 cur_rank 0 abs_cur_rank 0 total_num 183 188
9 22
9 50 58
checkcorrect 0 0 real score 1.5718801110982894 Hits@1 0.8540540540540541 Hits@3 0.972972972972973 Hits@10 1.0 MRR 0.9172072072072073 cur_rank 0 abs_cur_rank 0 total_num 184 188
9 18
9 16 10
checkcorrect 0 0 real score 1.3778696570545435 Hits@1 0.8548387096774194 Hits@3 0.9731182795698925 Hits@10 1.0 MRR 0.9176523297491039 cur_rank 0 abs_cur_rank 0 total_num 185 188
9 14
9 12 33
checkcorrect 0 0 real score 1.2510857969522475 Hits@1 0.8556149732620321 Hits@3 0.9732620320855615 Hits@10 1.0 MRR 0.9180926916221034 cur_rank 0 abs_cur_rank 0 total_num 186 188
0 0
0 1 51
checkcorrect 0 0 real score 0.0 Hits@1 

In [40]:
###########################################
##obtain the AUC-PR for the test triples###
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import auc, plot_precision_recall_curve
import matplotlib.pyplot as plt

#we select all the triples in the inductive test set
pos_triples = list(data_ind_test)

#we build the negative samples by randomly replace head or tail entity in the triple.
neg_triples = list()

for i in range(len(pos_triples)):
    
    s_pos, r_pos, t_pos = pos_triples[i][0], pos_triples[i][1], pos_triples[i][2]
    
    #decide to replace the head or tail entity
    number_0 = random.uniform(0, 1)
    
    if number_0 < 0.5: #replace head entity
        
        s_neg = random.choice(list(new_ent_set))
        
        #filter out the existing triples
        while ((s_neg, r_pos, t_pos) in data_test) or (
               (s_neg, r_pos, t_pos) in data_valid) or (
               (s_neg, r_pos, t_pos) in data) or (
               (s_neg, r_pos, t_pos) in data_ind) or (
               (s_neg, r_pos, t_pos) in data_ind_valid) or (
               (s_neg, r_pos, t_pos) in data_ind_test):
            
            s_neg = random.choice(list(new_ent_set))
        
        neg_triples.append((s_neg, r_pos, t_pos))
    
    else: #replace tail entity

        t_neg = random.choice(list(new_ent_set))
        
        #filter out the existing triples
        while ((s_pos, r_pos, t_neg) in data_test) or (
               (s_pos, r_pos, t_neg) in data_valid) or (
               (s_pos, r_pos, t_neg) in data) or (
               (s_pos, r_pos, t_neg) in data_ind) or (
               (s_pos, r_pos, t_neg) in data_ind_valid) or (
               (s_pos, r_pos, t_neg) in data_ind_test):
            
            t_neg = random.choice(list(new_ent_set))
        
        neg_triples.append((s_pos, r_pos, t_neg))

if len(pos_triples) != len(neg_triples):
    raise ValueError('error when generating negative triples')
        
#combine all triples
all_triples = pos_triples + neg_triples

#obtain the label array
arr1 = np.ones((len(pos_triples),))
arr2 = np.zeros((len(neg_triples),))
y_test = np.concatenate((arr1, arr2))

#shuffle positive and negative triples (optional)
all_triples, y_test = shuffle(all_triples, y_test)

#obtain the score aray
y_score = np.zeros((len(y_test),))

#implement the scoring
for i in range(len(all_triples)):
    
    s, r, t = all_triples[i][0], all_triples[i][1], all_triples[i][2]
    
    #path_score = path_based_triple_scoring(s, r, t, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)
    
    subg_score = subgraph_triple_scoring(s, r, t, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #ave_score = (path_score + subg_score)/float(2)
    
    #y_score[i] = ave_score
    y_score[i] = subg_score
    
    if i % 20 == 0 and i > 0:
        print('evaluating scores', i, len(all_triples))
        
        # Data to plot precision - recall curve
        precision, recall, thresholds = precision_recall_curve(y_test[:i], y_score[:i])
        # Use AUC function to calculate the area under the curve of precision recall curve
        auc_precision_recall = auc(recall, precision)
        print('AUC-PR is:', auc_precision_recall)
        
        
# Data to plot precision - recall curve
precision, recall, thresholds = precision_recall_curve(y_test, y_score)
# Use AUC function to calculate the area under the curve of precision recall curve
auc_precision_recall = auc(recall, precision)
print('AUC-PR is:', auc_precision_recall)

evaluating scores 20 376
AUC-PR is: 0.8158124039702987
evaluating scores 40 376
AUC-PR is: 0.770603450876653
evaluating scores 60 376
AUC-PR is: 0.7587513046465775
evaluating scores 80 376
AUC-PR is: 0.7889110246028787
evaluating scores 100 376
AUC-PR is: 0.77768457943568
evaluating scores 120 376
AUC-PR is: 0.7778079025696366
evaluating scores 140 376
AUC-PR is: 0.7901361514723642
evaluating scores 160 376
AUC-PR is: 0.7539759441124845
evaluating scores 180 376
AUC-PR is: 0.7480333614245009
evaluating scores 200 376
AUC-PR is: 0.7373484679242981
evaluating scores 220 376
AUC-PR is: 0.7403965974871085
evaluating scores 240 376
AUC-PR is: 0.7600854965707611
evaluating scores 260 376
AUC-PR is: 0.7360974188142251
evaluating scores 280 376
AUC-PR is: 0.7377639545950183
evaluating scores 300 376
AUC-PR is: 0.725380399187881
evaluating scores 320 376
AUC-PR is: 0.7198041945682027
evaluating scores 340 376
AUC-PR is: 0.7173594115404267
evaluating scores 360 376
AUC-PR is: 0.7250464293392811


In [None]:
######################################################
#obtain the Hits@N for entity prediction##############

#we select all the triples in the inductive test set
selected = list(data_ind_test)

###Hit at 1#############################
#generate the negative samples by randomly replace relation with all the other relaiton
Hits_at_1 = 0
Hits_at_3 = 0
Hits_at_10 = 0
MRR_raw = 0.

for i in range(len(selected)):
    
    triple_list = list()
    
    #score the true triple
    s_pos, r_pos, t_pos = selected[i][0], selected[i][1], selected[i][2]

    #path_score = path_based_triple_scoring(s_pos, r_pos, t_pos, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)

    subg_score = subgraph_triple_scoring(s_pos, r_pos, t_pos, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)
    
    #ave_score = (path_score + subg_score)/float(2)
    
    triple_list.append([(s_pos, r_pos, t_pos), subg_score])
    
    #generate the 50 random samples
    for sub_i in range(50):
        
        #decide to replace the head or tail entity
        number_0 = random.uniform(0, 1)

        if number_0 < 0.5: #replace head entity
            
            s_neg = random.choice(list(new_ent_set))
            
            while ((s_neg, r_pos, t_pos) in data_test) or (
                   (s_neg, r_pos, t_pos) in data_valid) or (
                   (s_neg, r_pos, t_pos) in data) or (
                   (s_neg, r_pos, t_pos) in data_ind) or (
                   (s_neg, r_pos, t_pos) in data_ind_valid) or (
                   (s_neg, r_pos, t_pos) in data_ind_test):

                s_neg = random.choice(list(new_ent_set))
            
            #path_score = path_based_triple_scoring(s_neg, r_pos, t_pos, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)

            subg_score = subgraph_triple_scoring(s_neg, r_pos, t_pos, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)

            #ave_score = (path_score + subg_score)/float(2)

            triple_list.append([(s_neg, r_pos, t_pos), subg_score])
            
        else: #replace tail entity

            t_neg = random.choice(list(new_ent_set))
            
            #filter out the existing triples
            while ((s_pos, r_pos, t_neg) in data_test) or (
                   (s_pos, r_pos, t_neg) in data_valid) or (
                   (s_pos, r_pos, t_neg) in data) or (
                   (s_pos, r_pos, t_neg) in data_ind) or (
                   (s_pos, r_pos, t_neg) in data_ind_valid) or (
                   (s_pos, r_pos, t_neg) in data_ind_test):

                t_neg = random.choice(list(new_ent_set))
            
            #path_score = path_based_triple_scoring(s_pos, r_pos, t_neg, lower_bound, upper_bound_path, one_hop_ind, id2relation, model)

            subg_score = subgraph_triple_scoring(s_pos, r_pos, t_neg, lower_bound, upper_bound_subg, one_hop_ind, id2relation, model_2)

            #ave_score = (path_score + subg_score)/float(2)

            triple_list.append([(s_pos, r_pos, t_neg), subg_score])
            
    #random shuffle!
    random.shuffle(triple_list)
    
    #sort
    sorted_list = sorted(triple_list, key = lambda x: x[-1], reverse=True)
    
    p = 0
    
    while p < len(sorted_list) and sorted_list[p][0] != (s_pos, r_pos, t_pos):
            
        p += 1
    
    if p == 0:
        
        Hits_at_1 += 1
        
    if p < 3:
        
        Hits_at_3 += 1
        
    if p < 10:
        
        Hits_at_10 += 1
        
    MRR_raw += 1./float(p + 1.) 
        
    print('checkcorrect', (s_pos, r_pos, t_pos), sorted_list[p][0],
          'real score', sorted_list[p][-1],
          'Hits@1', Hits_at_1/(i+1),
          'Hits@3', Hits_at_3/(i+1),
          'Hits@10', Hits_at_10/(i+1),
          'MRR', MRR_raw/(i+1),
          'rank', p,
          'total_num', i, len(selected))

checkcorrect (3616, 0, 3274) (3616, 0, 3274) real score 0.4064545691013336 Hits@1 0.0 Hits@3 0.0 Hits@10 0.0 MRR 0.03571428571428571 rank 27 total_num 0 188
checkcorrect (3613, 0, 3144) (3613, 0, 3144) real score 0.7161331176757812 Hits@1 0.0 Hits@3 0.5 Hits@10 0.5 MRR 0.26785714285714285 rank 1 total_num 1 188
checkcorrect (2799, 0, 3251) (2799, 0, 3251) real score 0.7103965640068054 Hits@1 0.0 Hits@3 0.3333333333333333 Hits@10 0.6666666666666666 MRR 0.2261904761904762 rank 6 total_num 2 188
checkcorrect (3170, 0, 2776) (3170, 0, 2776) real score 0.7012916684150696 Hits@1 0.0 Hits@3 0.25 Hits@10 0.75 MRR 0.20535714285714285 rank 6 total_num 3 188
checkcorrect (3074, 0, 3194) (3074, 0, 3194) real score 0.749379575252533 Hits@1 0.0 Hits@3 0.4 Hits@10 0.8 MRR 0.23095238095238094 rank 2 total_num 4 188
checkcorrect (3329, 8, 2803) (3329, 8, 2803) real score 0.287155294418335 Hits@1 0.0 Hits@3 0.3333333333333333 Hits@10 0.6666666666666666 MRR 0.19912698412698412 rank 24 total_num 5 188
che

checkcorrect (2940, 0, 3343) (2940, 0, 3343) real score 0.7114482671022415 Hits@1 0.0 Hits@3 0.3404255319148936 Hits@10 0.48936170212765956 MRR 0.1805547654622057 rank 3 total_num 46 188
checkcorrect (3339, 0, 3170) (3339, 0, 3170) real score 0.5601408511400223 Hits@1 0.0 Hits@3 0.3333333333333333 Hits@10 0.4791666666666667 MRR 0.17839577195097386 rank 12 total_num 47 188
checkcorrect (3212, 8, 3283) (3212, 8, 3283) real score 0.5529832601547241 Hits@1 0.0 Hits@3 0.32653061224489793 Hits@10 0.46938775510204084 MRR 0.1764557221832669 rank 11 total_num 48 188
checkcorrect (3377, 8, 3576) (3377, 8, 3576) real score 0.0 Hits@1 0.0 Hits@3 0.32 Hits@10 0.46 MRR 0.17345292352907526 rank 37 total_num 49 188
checkcorrect (3154, 0, 3597) (3154, 0, 3597) real score 0.25739180445671084 Hits@1 0.0 Hits@3 0.3137254901960784 Hits@10 0.45098039215686275 MRR 0.17058182751923118 rank 36 total_num 50 188
checkcorrect (3150, 0, 3219) (3150, 0, 3219) real score 0.726323926448822 Hits@1 0.0 Hits@3 0.3076923

checkcorrect (3572, 0, 2813) (3572, 0, 2813) real score 0.5788687020540237 Hits@1 0.02247191011235955 Hits@3 0.24719101123595505 Hits@10 0.4044943820224719 MRR 0.17062255277834687 rank 23 total_num 88 188
checkcorrect (3540, 0, 2833) (3540, 0, 2833) real score 0.6214325815439224 Hits@1 0.022222222222222223 Hits@3 0.24444444444444444 Hits@10 0.4111111111111111 MRR 0.17150452441414302 rank 3 total_num 89 188
checkcorrect (2859, 0, 3040) (2859, 0, 3040) real score 0.6616534888744354 Hits@1 0.02197802197802198 Hits@3 0.24175824175824176 Hits@10 0.4175824175824176 MRR 0.170718760409592 rank 9 total_num 90 188
checkcorrect (2956, 0, 2903) (2956, 0, 2903) real score 0.7293300211429596 Hits@1 0.021739130434782608 Hits@3 0.25 Hits@10 0.42391304347826086 MRR 0.17248631011528484 rank 2 total_num 91 188
checkcorrect (3388, 0, 2809) (3388, 0, 2809) real score 0.7448042243719101 Hits@1 0.03225806451612903 Hits@3 0.25806451612903225 Hits@10 0.43010752688172044 MRR 0.18138430678071185 rank 0 total_num