# Exploratory analysis

## Looking at our data

In [33]:
import torch
from node2vec import Node2Vec
from torch_geometric.data import Data
from utils.gsn_argparse import str2bool, str2actication
import torch_geometric.utils as gutils
from torch_geometric.nn import GAE
import trainer
import utils.gsn_argparse as gap
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

## Label encoding

In [34]:
# Read data and make dataframes
name = ['entity', 'id']
entity_id = pd.read_csv('./data/FB15k/entities.txt', sep='\t', header=None, names=name, engine='python')
all_entities = entity_id['entity'].values

name = ['relation', 'id']
relation_id = pd.read_csv('./data/FB15k/relations.txt', sep='\t', header=None, names=name, engine='python')
all_relations = relation_id['relation'].values

# Read RDF Triples
name = ['subject', 'object', 'relation']
data = pd.read_csv('./data/FB15k/valid.txt', sep='\t', header=None, names=name, engine='python')

SUBSAMPLE = 100

subjects = data['subject'].values
objects = data['object'].values
relations = data['relation'].values

In [35]:
# fit entity encoder
le_entity = LabelEncoder()
le_entity.fit(all_entities)

# fit relationship encoder
le_relation = LabelEncoder()
le_relation.fit(all_relations)

# string list to int array using LabelEncoder on complete data set
subjects = le_entity.transform(subjects)
objects = le_entity.transform(objects)
relations = le_relation.transform(relations)

# encode subsample (change range to 0-N)
le_entity2 = LabelEncoder().fit(np.append(subjects,objects))
le_relation2 = LabelEncoder().fit(relations)


subjects = le_entity2.transform(subjects)
objects = le_entity2.transform(objects)
relations = le_relation2.transform(relations)


edge_attributes = torch.tensor(relations, dtype=torch.float)
edge_index = torch.tensor([subjects, objects], dtype=torch.long)
unique_entities = torch.tensor(np.unique(edge_index.reshape(edge_index.shape[-1]*2, 1)), dtype=torch.float)
dataset = Data(x=unique_entities, edge_attr=edge_attributes, edge_index=edge_index)

In [36]:
from gensim.models import KeyedVectors
embedded_nodes =  KeyedVectors.load_word2vec_format('embeddings/node_embedding.kv')

In [37]:
dataset.x = torch.tensor(embedded_nodes.vectors, dtype=torch.float)
print(dataset)

Data(edge_attr=[50000], edge_index=[2, 50000], x=[13292, 16])


In [38]:
data = dataset
data.edge_type = torch.LongTensor(relations) #torch.zeros(((data.edge_index.size(-1)),)).long()
data.batch = torch.zeros((1, data.num_nodes), dtype=torch.int64).view(-1)
data.num_graphs = 1
num_features = dataset.x.shape[-1] 
relation_dimension = len(np.unique(relations))
print(f"no. unique relations: {relation_dimension}")
print(f"no. edge_type size: {data.edge_type.size()}")
print(f"no. relation size: {relations.shape}")
print(f"edge_index size: {data.edge_index.size()}")
print(f"min: {np.min(relations)}")
print(f"min: {np.max(relations)}")

no. unique relations: 916
no. edge_type size: torch.Size([50000])
no. relation size: (50000,)
edge_index size: torch.Size([2, 50000])
min: 0
min: 915


In [39]:
def load():
    # Read data and make dataframes
    name = ['entity', 'id']
    entity_id = pd.read_csv('./data/FB15k/entities.txt', sep='\t', header=None, names=name, engine='python')
    all_entities = entity_id['entity'].values

    name = ['relation', 'id']
    relation_id = pd.read_csv('./data/FB15k/relations.txt', sep='\t', header=None, names=name, engine='python')
    all_relations = relation_id['relation'].values

    # Read RDF Triples
    name = ['subject', 'object', 'relation']
    data = pd.read_csv('./data/FB15k/valid.txt', sep='\t', header=None, names=name, engine='python')

    SUBSAMPLE = 100

    subjects = data['subject'].values
    objects = data['object'].values
    relations = data['relation'].values
    # fit entity encoder
    le_entity = LabelEncoder()
    le_entity.fit(all_entities)

    # fit relationship encoder
    le_relation = LabelEncoder()
    le_relation.fit(all_relations)

    # string list to int array using LabelEncoder on complete data set
    subjects = le_entity.transform(subjects)
    objects = le_entity.transform(objects)
    relations = le_relation.transform(relations)

    # encode subsample (change range to 0-N)
    le_entity2 = LabelEncoder().fit(np.append(subjects,objects))
    le_relation2 = LabelEncoder().fit(relations)


    subjects = le_entity2.transform(subjects)
    objects = le_entity2.transform(objects)
    relations = le_relation2.transform(relations)


    edge_attributes = torch.tensor(relations, dtype=torch.float)
    edge_index = torch.tensor([subjects, objects], dtype=torch.long)
    unique_entities = torch.tensor(np.unique(edge_index.reshape(edge_index.shape[-1]*2, 1)), dtype=torch.float)
    dataset = Data(x=unique_entities, edge_attr=edge_attributes, edge_index=edge_index)
    
    from gensim.models import KeyedVectors
    embedded_nodes =  KeyedVectors.load_word2vec_format('embeddings/node_embedding.kv')
    dataset.x = torch.tensor(embedded_nodes.vectors, dtype=torch.float)
    print(dataset)
    
    data = dataset
    data.edge_type = torch.LongTensor(relations) #torch.zeros(((data.edge_index.size(-1)),)).long()
    data.batch = torch.zeros((1, data.num_nodes), dtype=torch.int64).view(-1)
    data.num_graphs = 1
    num_features = dataset.x.shape[-1] 
    relation_dimension = len(np.unique(relations))
    print(f"no. unique relations: {relation_dimension}")
    print(f"no. edge_type size: {data.edge_type.size()}")
    print(f"no. relation size: {relations.shape}")
    print(f"edge_index size: {data.edge_index.size()}")
    print(f"min: {np.min(relations)}")
    print(f"min: {np.max(relations)}")
    return dataset, le_entity, le_entity2, le_relation, le_relation2

In [40]:
def inverse_transform(sub, obj, rel):
    sub = le_entity.inverse_transform(le_entity2.inverse_transform([sub]))
    obj = le_entity.inverse_transform(le_entity2.inverse_transform([obj]))
    rel = le_relation.inverse_transform(le_relation2.inverse_transform([rel]))
    return sub[0], obj[0], rel[0]

In [41]:
inverse_transform(data.edge_index[0][0], data.edge_index[1][0], data.edge_type[0])

('/m/07pd_j', '/m/02l7c8', '/film/film/genre')

In [42]:
label_dict ={}
for n1, n2, ys in zip(data.edge_index[0], data.edge_index[1], data.edge_type):
    label_dict[int(n1), int(n2)] = int(ys)

In [43]:
def visualize(h, epoch=None, loss=None):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])

    if torch.is_tensor(h):
        h = h.detach().cpu().numpy()
        plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
        if epoch is not None and loss is not None:
            plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)
    else:
        nx.draw_networkx(h, pos=nx.spring_layout(h, seed=42), with_labels=True,
                          cmap="Set2")
        nx.draw_networkx_edge_labels(h, pos=nx.spring_layout(h, seed=42), edge_labels=label_dict, cmap="Set2")
    plt.show()

In [44]:
from torch_geometric.utils import to_networkx
G = to_networkx(dataset)

AttributeError: 'int' object has no attribute 'squeeze'

In [45]:
print(G.number_of_nodes())
print(dataset.num_nodes)

NameError: name 'G' is not defined

In [46]:
%matplotlib notebook
if len(unique_entities) > 200:
    print("you should probably not visualize huge node networks")
else:
    visualize(G) 

you should probably not visualize huge node networks


In [47]:
#!pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys
from SPARQLWrapper import SPARQLWrapper, JSON

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

def freebase_parser(freebase_id):
    endpoint_url = "https://query.wikidata.org/sparql"

    query = \
    '''SELECT ?sLabel WHERE { 
        ?s wdt:P646 "''' + freebase_id + '''".
        SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    LIMIT 1'''
    res = get_results(endpoint_url, query)
    if len(res['results']['bindings']) == 0:
        return "No result"
    else:
        return get_results(endpoint_url, query)['results']['bindings'][0]['sLabel']['value']

In [48]:
def rdf2txt(sub, obj, rel):
    sub, obj, rel = inverse_transform(sub,obj,rel)
    sub = freebase_parser(sub)
    obj = freebase_parser(obj)
    return sub, obj, str(rel)

In [49]:
rdf2txt(23, 2, 46)

('Racine',
 'Lubbock',
 '/award/award_nominated_work/award_nominations./award/award_nomination/award')

## Node2Vec

In [50]:
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec = Node2Vec(G, dimensions=16, walk_length=15, num_walks=20, workers=1)  # Use temp_folder for big graphs

NameError: name 'G' is not defined

In [None]:
# Embed nodes
model = node2vec.fit(window=10, min_count=1, batch_words=4)  
# Any keywords acceptable by gensim.Word2Vec can be passed, 
# `dimensions` and `workers` are automatically passed
# (from the Node2Vec constructor)

In [None]:
def node2Text(node_id):
    freebase_id = le_entity.inverse_transform(le_entity2.inverse_transform([node_id]))
    return freebase_parser(freebase_id[0])

In [None]:
# Look for most similar nodes
NODE_ID = '2'
print(f"Most similar Nodes to {node2Text(int(NODE_ID))}")
for node in model.wv.most_similar(NODE_ID):
    sim_node_id, percentage = node
    print(node2Text(int(sim_node_id)), percentage) 

In [None]:
# Save embeddings for later use
import os
PATH = 'embeddings'
NODE_EMBEDDING_NAME = "node_embedding"
EMBEDDING_MODEL_NAME = "node_embedding_model"
if not os.path.exists(PATH):
    os.mkdir(PATH)
model.wv.save_word2vec_format(os.path.join(PATH, NODE_EMBEDDING_NAME + ".kv"))

In [None]:
# Save model for later use
model.save(os.path.join(PATH, EMBEDDING_MODEL_NAME + ".pkl"))

In [None]:
from gensim.models import KeyedVectors
embedded_nodes =  KeyedVectors.load_word2vec_format('embeddings/node_embedding.kv')


In [None]:
# Load embeddings 
embeddings = model.wv.load_word2vec_format(os.path.join(PATH, NODE_EMBEDDING_NAME + ".kv"))
embedded_model = model.wv.load(os.path.join(PATH, EMBEDDING_MODEL_NAME + ".pkl"))

In [None]:
# Look for most similar nodes
NODE_ID = '2'
print(f"Most similar Nodes to {node2Text(int(NODE_ID))}")
for node in embedded_model.wv.most_similar(NODE_ID):
    sim_node_id, percentage = node
    print(node2Text(int(sim_node_id)), percentage) 

In [51]:
embedded_x = torch.tensor(embeddings.vectors, dtype=torch.float)

NameError: name 'embeddings' is not defined

In [52]:
dataset.edge_index = torch.stack([row, col], dim=0)

NameError: name 'row' is not defined

In [53]:
data = GAE.split_edges(GAE, dataset)

AssertionError: 

In [54]:
l1 = data.test_pos_edge_index[0][0]
l2 = data.test_pos_edge_index[1][0]

AttributeError: 'Data' object has no attribute 'test_pos_edge_index'

In [55]:
print(dataset.edge_index[0].tolist().index(l1))
print(dataset.edge_index[1].tolist().index(l2))

NameError: name 'l1' is not defined

In [56]:
dataset

Data(batch=[13292], edge_attr=[50000], edge_index=[2, 50000], edge_type=[50000], num_graphs=[1], x=[13292, 16])

In [57]:
y[np.where(dataset.edge_index.T == torch.tensor([5217, 10644]))[0]]

NameError: name 'y' is not defined

In [58]:
import math as m

entity_id = pd.read_csv('./data/FB15k/entities.txt', sep='\t', header=None, names=['entity', 'id'], engine='python')
entity = entity_id['entity'].values

relation_id = pd.read_csv('./data/FB15k/relations.txt', sep='\t', header=None, names=['relation', 'id'], engine='python')
relation = relation_id['relation'].values

data = pd.read_csv('./data/FB15k/valid.txt', sep='\t', header=None, names=['subject', 'object', 'relation'], engine='python')
print('\tLoading FB15k training (valid file) data...')

dataset = label_encode_dataset(entity, relation, data)

# create node embeddings if none exists
if not os.path.exists("embeddings"):
    create_node_embedding(dataset)
embedded_nodes =  KeyedVectors.load_word2vec_format('embeddings/node_embedding.kv')

dataset.x = torch.tensor(embedded_nodes.vectors, dtype=torch.float)
data = GAE.split_edges(GAE, dataset)

	Loading FB15k training (valid file) data...


NameError: name 'label_encode_dataset' is not defined

In [None]:
print(dataset)
full_length = dataset.edge_index.shape[-1]
train_index = torch.tensor(dataset.edge_index[:, 0:m.floor(full_length*0.7)], dtype=torch.long)
train_attr_index = torch.tensor(dataset.edge_attr[0:m.floor(full_length*0.7)], dtype=torch.long)

val_index = torch.tensor(dataset.edge_index[:, m.floor(full_length*0.7):m.floor(full_length*0.9)], dtype=torch.long)
val_attr_index = torch.tensor(dataset.edge_attr[m.floor(full_length*0.7):m.floor(full_length*0.9)], dtype=torch.long)

test_index = torch.tensor(dataset.edge_index[:, m.floor(full_length*0.9):], dtype=torch.long)
test_attr_index = torch.tensor(dataset.edge_attr[m.floor(full_length*0.9):], dtype=torch.long)



dataset.edge_index = torch.cat([train_index, val_index, test_index], dim=1)
dataset.edge_attr = torch.cat([train_attr_index, val_attr_index, test_attr_index])

dataset.edge_train_mask = torch.cat([torch.ones((train_index.size(-1))),
                                  torch.zeros((val_index.size(-1))),
                                  torch.zeros((test_index.size(-1)))], dim=0).byte()
dataset.edge_val_mask = torch.cat([torch.zeros((train_index.size(-1))),
                                torch.ones((val_index.size(-1))),
                                torch.zeros((test_index.size(-1)))], dim=0).byte()
dataset.edge_test_mask = torch.cat([torch.zeros((train_index.size(-1))),
                                 torch.zeros((val_index.size(-1))),
                                 torch.ones((test_index.size(-1)))], dim=0).byte()

dataset.edge_train_attr_mask = torch.cat([torch.ones((train_attr_index.size(-1))),
                                  torch.zeros((val_attr_index.size(-1))),
                                  torch.zeros((test_attr_index.size(-1)))], dim=0).byte()
dataset.edge_val_attr_mask = torch.cat([torch.zeros((train_attr_index.size(-1))),
                                torch.ones((val_attr_index.size(-1))),
                                torch.zeros((test_attr_index.size(-1)))], dim=0).byte()
dataset.edge_test_attr_mask = torch.cat([torch.zeros((train_attr_index.size(-1))),
                                 torch.zeros((val_attr_index.size(-1))),
                                 torch.ones((test_attr_index.size(-1)))], dim=0).byte()

dataset.edge_type = torch.zeros(((dataset.edge_index.size(-1)),)).long()

dataset.batch = torch.zeros((1, dataset.num_nodes), dtype=torch.int64).view(-1)
dataset.num_graphs = 1
num_features = dataset.x.shape[-1] 
num_relations = max(np.unique(dataset.edge_attr)) + 1

In [59]:
np.where(dataset.edge_index.T == data.val_pos_edge_index.T[0])[0]

AttributeError: 'DataFrame' object has no attribute 'val_pos_edge_index'

In [60]:
pair = dataset.val_pos_edge_index.T[0]
obj = pair[1]
sub = pair[0]
rel = np.where(dataset.edge_index.T == data.val_pos_edge_index.T[0])[0]
print(obj, sub, rel)

AttributeError: 'Data' object has no attribute 'val_pos_edge_index'

In [61]:
rdf2txt(int(sub), int(obj), int(y[rel]))

NameError: name 'sub' is not defined

In [62]:
dataset.edge_attr[4877]

tensor(898.)

In [63]:
rdf2txt(8937, 4141, 341)

('American Pie', 'romance film', '/film/film/genre')

In [64]:
from torch_geometric.utils import to_networkx
from node2vec import Node2Vec
import os

def make_node_embeddings(dataset, path="embeddings", node_embedding_name="node_embeddings", embedding_model_name="node_embedding_model", dimensions=16, walk_length=15, num_walks=20, workers=1, window=10, min_count=1, batch_words=4):
    G = to_networkx(dataset)
   
    # Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
    node2vec = Node2Vec(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers)  # Use temp_folder for big graphs
    
    # Embed nodes
    model = node2vec.fit(window=window, min_count=min_count, batch_words=batch_words)  # Any keywords acceptable by gensim.Word2Vec can be passed, 
                                                                 # `dimensions` and `workers` are automatically passed
                                                                 # (from the Node2Vec constructor)
    
    # Save embeddings for later use
    if not os.path.exists(path):
        os.mkdir(path)
    model.wv.save_word2vec_format(os.path.join(path, node_embedding_name + ".kv"))
    model.save(os.path.join(path, embedding_model_name + ".pkl"))
    print(f"Saved embedding and model in the {path} folder")
    return model.vw.vectors

## Evaluating Graph Star Multi Relational

In [65]:
import torch
model = torch.load("output/FB15K_1024_Hid.pkl")

In [87]:
model

GraphStar(
  (fl): Linear(in_features=16, out_features=256, bias=True)
  (star_init): StarAttn(
    (Wq): Linear(in_features=256, out_features=256, bias=True)
    (Wk): Linear(in_features=256, out_features=256, bias=True)
    (Wv): Linear(in_features=256, out_features=256, bias=True)
    (sLayerNorm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  )
  (conv_list): ModuleList(
    (0): GraphStarConv(256, 256, heads=4)
    (1): GraphStarConv(256, 256, heads=4)
    (2): GraphStarConv(256, 256, heads=4)
  )
  (star_attn_list): ModuleList(
    (0): StarAttn(
      (Wq): Linear(in_features=256, out_features=256, bias=True)
      (Wk): Linear(in_features=256, out_features=256, bias=True)
      (Wv): Linear(in_features=256, out_features=256, bias=True)
      (sLayerNorm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (1): StarAttn(
      (Wq): Linear(in_features=256, out_features=256, bias=True)
      (Wk): Linear(in_features=256, out_features=256, bias=True)
      (

In [66]:
dataset, le_entity, le_entity2, le_relation, le_relation2 = load()

def inverse_transform(sub, obj, rel):
    sub = le_entity.inverse_transform(le_entity2.inverse_transform([sub]))
    obj = le_entity.inverse_transform(le_entity2.inverse_transform([obj]))
    rel = le_relation.inverse_transform(le_relation2.inverse_transform([rel]))
    return sub[0], obj[0], rel[0]

Data(edge_attr=[50000], edge_index=[2, 50000], x=[13292, 16])
no. unique relations: 916
no. edge_type size: torch.Size([50000])
no. relation size: (50000,)
edge_index size: torch.Size([2, 50000])
min: 0
min: 915


In [192]:
# Logits embedding [13292, 256] -> [unique nodes x hidden layer]
z = model.z
# Edge index between 2 nodes
edge_index = dataset.edge_index.T[100].T
# relation type
edge_type = dataset.edge_type[100]

In [193]:
model.rl

Linear(in_features=918, out_features=256, bias=True)

In [194]:
rdf2txt(edge_index[0], edge_index[1], edge_type)

('Syria', 'Damascus', '/location/country/administrative_divisions')

In [195]:
# updated in training (not after)
model.RW

Parameter containing:
tensor([[ 0.0461, -0.0483, -0.0591,  ..., -0.0494, -0.0815,  0.0566],
        [-0.0573, -0.0042,  0.0070,  ..., -0.0586, -0.0540, -0.0835],
        [-0.0400, -0.0135, -0.0529,  ..., -0.0017, -0.0051,  0.0231],
        ...,
        [ 0.0662,  0.0853,  0.0700,  ...,  0.0435, -0.0358, -0.0923],
        [-0.0007, -0.0064,  0.0640,  ..., -0.0492, -0.0112,  0.0284],
        [-0.0126,  0.0563, -0.0568,  ..., -0.0611, -0.0003, -0.0608]],
       requires_grad=True)

In [196]:
z2 = torch.sigmoid(z)

In [197]:
head = z[edge_index[0]]
relation =  model.RW
tail = z[edge_index[1]]

In [198]:
model.RW.size()

torch.Size([918, 256])

In [199]:
p.size()

torch.Size([918, 256])

In [200]:
# SCORING FUNCTION
p = head*relation*tail
score = torch.sigmoid(p.sum(dim=-1))

In [201]:
print(head.size())
print(relation.size())
print(tail.size())

torch.Size([256])
torch.Size([918, 256])
torch.Size([256])


In [202]:
pred = score.detach().numpy().argsort()[-10:][::-1]

In [203]:
pred

array([521, 719,  84, 646, 826, 618, 306, 285, 167, 169], dtype=int64)

In [204]:
for l in pred:
    print(rdf2txt(edge_index[0], edge_index[1], l))
# Recall precision on top 10.

('Syria', 'Damascus', '/location/location/partially_containedby')
('Syria', 'Damascus', '/people/person/spouse_s./people/marriage/type_of_union')
('Syria', 'Damascus', '/base/biblioness/bibs_location/country')
('Syria', 'Damascus', '/olympics/olympic_sport/athletes./olympics/olympic_athlete_affiliation/athlete')
('Syria', 'Damascus', '/time/event/locations')
('Syria', 'Damascus', '/music/performance_role/track_performances./music/track_contribution/role')
('Syria', 'Damascus', '/fictional_universe/fictional_character/occupation')
('Syria', 'Damascus', '/education/field_of_study/students_majoring./education/education/minor')
('Syria', 'Damascus', '/baseball/baseball_player/former_teams./baseball/baseball_historical_roster_position/team')
('Syria', 'Damascus', '/baseball/baseball_player/position_s')


In [83]:
# Logits embedding [13292, 256] -> [unique nodes x hidden layer]
z = model.z

def experiment(index=0):
    
    # Edge index between 2 nodes
    edge_index = dataset.edge_index.T[index].T
    # relation type
    edge_type = dataset.edge_type[index]
    
    h, t, r = rdf2txt(edge_index[0], edge_index[1], edge_type)
    print(f" \
        Original data: \n \
        Head: {h} \n \
        Relation: {r} \n \
        Tail: {t} \n")
    
    head = z[edge_index[0]]
    relation =  model.RW
    tail = z[edge_index[1]]
    p = head * relation * tail
    pred = int(round(np.argmax(p.detach().numpy())/256))
    
    h, t, r = rdf2txt(edge_index[0], edge_index[1], pred)
    print(f" \
        Predicted data: \n \
        Head: {h} \n \
        Relation: {r} \n \
        Tail: {t} \n")

In [84]:
experiment(1690)

         Original data: 
         Head: As Good as It Gets 
         Relation: /film/film/other_crew./film/film_crew_gig/film_crew_role 
         Tail: make-up artist 

         Predicted data: 
         Head: As Good as It Gets 
         Relation: /cvg/cvg_genre/games 
         Tail: make-up artist 



In [85]:
head = z[edge_index[0]]
relation =  model.RW[edge_type]
tail = z[edge_index[1]]
p = head * relation * tail

In [86]:
p.sum()

tensor(0.5769, grad_fn=<SumBackward0>)

## How to evaluate
All should use top 10 / 5 / 1
*  Zero - One f0-score (does it predict correctly or not)
*  Hierachical correctness (How far up the relationtree ? Does it get film/film or film/film/other_crew)
*  Top-K hits (Is the correct in label in top-K in predictions) (precision, recall, f0)
*  Split relationship and regex in pred relations (finding related relations)

Need to explain hierachical layout of freebase!
