## Import packages

In [1]:
import pandas as pd
import numpy as np
from operator import itemgetter
from CFModel import CFModel

Using Theano backend.


## Define constants

In [2]:
TEST_CSV_FILE = 'fb15k_test.csv'
CVSC_ENTITIES_CSV_FILE = 'fb15k_cvsc_entities.csv'
CVSC_PAIRS_CSV_FILE = 'fb15k_cvsc_pairs.csv'
MODEL_WEIGHTS_FILE = 'fb15k_cvsc_weights.h5'
K_FACTORS = 50

## Load FB215-237 data

In [5]:
triples = pd.read_csv(TEST_CSV_FILE, sep='\t', names=['subj', 'rel', 'obj', 'pid', 'rid'])
entities = pd.read_csv(CVSC_ENTITIES_CSV_FILE, sep='\t', names=['entity'])['entity'].values[1:]
entity_pairs = pd.read_csv(CVSC_PAIRS_CSV_FILE, sep='\t', names=['subj', 'obj', 'pid'])

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
entities[0]

'/m/025sf8g'

## Load model weights into evaluation model

In [None]:
model = CFModel(n_pairs, m_relations, K_FACTORS)
model.load_weights(MODEL_WEIGHTS_FILE)

## Execute evaluation protocol

From [[2]](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/main_cvsc2015.pdf):

> Given a set of triples in a set disjoint from a training
> knowledge graph, we test models on predicting the subject or
> object of each triple, given the relation type and the other
> argument. We rank all entities in the training knowledge base in
> order of their likelihood of filling the argument position. We
> report the mean reciprocal rank of the correct entity, as well as
> HITS@10 – the percent of test triples for which the correct
> argument was ranked in the top ten. We use filtered measures
> following the protocol proposed in Bordes et al. (2013) – that
> is, when we rank entities for a given position, we remove all
> other entities that are known to be part of an existing triple in
> the training, development, or test set. This avoids penalizing
> the model for ranking other correct fillers higher than the
> tested argument. We thus report filtered mean reciprocal
> rank (labeled MRR in the Figures), and filtered HITS@10. In the
> figures we present MRR values scaled by 100, so that the maximum
> possible MRR is 100.

In [None]:
def sp_query_reciprocal_rank(model, subj, rid, obj, entities):
    objs = [ result[0] for result in sp_query_scores(model, subj, rid, entities) ]
    return reciprocal_rank(obj, objs)

def sp_query_hits_at_10(model, subj, rid, obj, entities):
    objs = [ result[0] for result in sp_query_scores(model, subj, rid, entities) ]
    if obj in objs[:10]:
        return 1.0
    else:
        return 0.0

def sp_query_results(model, subj, rid, entities):
    return sorted([ [ subj, model.rank(pid, rid) ] for pid in sp_query_pairs(subj, entities) ], 
                  reverse=True, 
                  key=itemgetter(1))

def sp_query_pairs(subj, entities):
    return [ pair_id(subj, obj) for obj in entities ]

def po_query_reciprocal_rank(model, subj, rid, obj, entities):
    subjs = [ result[0] for result in po_query_scores(model, obj, rid, entities) ]
    return reciprocal_rank(subj, subjs)

def po_query_hits_at_10(model, subj, rid, obj, entities):
    subjs = [ result[0] for result in po_query_scores(model, obj, rid, entities) ]
    if subj in subjs[:10]:
        return 1.0
    else:
        return 0.0

def po_query_results(model, obj, rid, entities):
    return sorted([ [ obj, model.rank(pid, rid) ] for pid in sp_query_pairs(subj, entities) ], 
                  reverse=True, 
                  key=itemgetter(1))

def po_query_pairs(obj, entities):
    return [ pair_id(subj, obj) for subj in entities ]

def pair_id(subj, obj):
    pair = entity_pairs[(entity_pairs['subj'] == subj) & (entity_pairs['obj'] = entity)]
    return pair['pid'].values[0]

def reciprocal_rank(correct_response, responses):
    return 1. / np.float(np.where(responses == correct_response)[0][0])

triples['sp_reciprocal_rank'] = sp_query_reciprocal_rank(model, triples['subj'], triples['rid'], triples['obj'], entities)
triples['po_reciprocal_rank'] = po_query_reciprocal_rank(model, triples['subj'], triples['rid'], triples['obj'], entities)
triples['sp_hits_at_10'] = sp_query_hits_at_10(model, triples['subj'], triples['rid'], triples['obj'], entities)
triples['po_hits_at_10'] = po_query_hits_at_10(model, triples['subj'], triples['rid'], triples['obj'], entities)

mrr = (triples['sp_reciprocal_rank'].sum() + triples['po_reciprocal_rank'].sum()) / (np.float(len(triples)) * 2.0))
hits_at_10 = (triples['sp_hits_at_10'].sum() + triples['po_hits_at_10'].sum()) / (np.float(len(triples)) * 2.0))

print 'Mean reciprocal rank:', mrr
print 'HITS@10:', hits_at_10

## References

[[1]](https://www.microsoft.com/en-us/download/details.aspx?id=52312) K. Toutanova, "FB215-237 Knowledge Base Completion Dataset," Web page https://www.microsoft.com/en-us/download/details.aspx?id=52312, May 2016. Last accessed 2016-08-14.

[[2]](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/main_cvsc2015.pdf) K. Toutanova and D. Chen, “Observed versus latent features for knowledge base and text inference,” in 3rd Workshop on Continuous Vector Space Models and Their Compositionality, Jul. 2015.