In [1]:
# --- start make file run from another dir ---
#
# Note: File should be run from XKE root dir. E.g.:
#
#       $ cd Projects/XKE
#       $ python examples/emb_grid_search/grid_search_TransE_FB13.py
#
import os, sys
file_ = os.path.join(os.path.abspath(sys.path[0])) # use this when inside a jupyter notebook
# file_ = __file__ # use this when running from command line
xke_root = os.path.abspath(os.path.join(file_, "../"))
sys.path.insert(0, xke_root)
# --- end make file run from another dir ---

In [43]:
%load_ext autoreload
%autoreload 2

from sfe.sfe2 import Graph, SFE
import pandas as pd
import numpy as np
import os, time

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [89]:
def debug_print(list_of_els):
    """Prints a list of elements using their string method."""
    l = []
    for n in list_of_els:
        l.append(n.__str__())
    print(l)

def print_node_seqs(node_seqs):
    """Prints a node sequences dict."""
    for node,seqs in node_seqs.iteritems():
        print "{}:".format(node)
        for seq in seqs: debug_print(seq)
        print
        
def print_bfs_edge_seqs_res(res):
    for end_node,d in res.iteritems():
        print "{}:".format(end_node)
        for key,val in d.iteritems():
            print "\t",
            for node in key:
                print node,
            print "\t", val

# Graph #1

In [4]:
data = [
    ['A', 'r1', 'B'],
    ['A', 'r2', 'D'],
    ['B', 'r3', 'C'],
    ['D', 'r5', 'Z'],
    ['E', 'r7', 'D'],
    ['F', 'r3', 'E'],
    ['Z', 'r1', 'F'],
    ['Z', 'r2', 'B'],
    ['Z', 'r4', 'C'],
    ['E', 'r3', 'H'],
    ['H', 'r4', 'I'],
    ['I', 'r5', 'J'],
]

data = pd.DataFrame(data, columns=['head','relation','tail'])
data

Unnamed: 0,head,relation,tail
0,A,r1,B
1,A,r2,D
2,B,r3,C
3,D,r5,Z
4,E,r7,D
5,F,r3,E
6,Z,r1,F
7,Z,r2,B
8,Z,r4,C
9,E,r3,H


In [5]:
# build graph in object representation
start_time = time.time()
g = Graph()
g.partial_build_from_df(data)
print("Elapsed time: {}".format(time.time() - start_time))

Elapsed time: 0.00382208824158


In [128]:
# run SFE
sfe = SFE(g)
sfe.max_depth = 3
paths = sfe.search_paths('A', 'I')
# feats = sfe.generate_features(data, max_depth=2)

paths

time get nodes: 4.05311584473e-06
time to perform BFS on both nodes: 0.000119924545288
time to merge edge sequences: 4.60147857666e-05


{('r1', '_r2', '_r5', '_r7', 'r3', 'r4'),
 ('r1', '_r2', 'r1', 'r3', 'r3', 'r4'),
 ('r2', '_r7', 'r3', 'r4'),
 ('r2', 'r5', 'r1', 'r3', 'r3', 'r4')}

In [70]:
res = sfe.bfs_edge_seqs(sfe.graph.get_node('D'))

here for Node(Z)
curent edge seq ()
here for Node(C)
curent edge seq ('r5',)
here for Node(F)
curent edge seq ('r5',)
here for Node(B)
curent edge seq ('r5',)
here for Node(H)
curent edge seq ('_r7',)
here for Node(B)
curent edge seq ('_r2',)


In [90]:
print_bfs_edge_seqs_res(res)

Node(A):
	Node(D) Node(A) 	set([('_r2',)])
Node(B):
	Node(D) Node(Z) Node(B) 	set([('r5', 'r2')])
	Node(D) Node(A) Node(B) 	set([('_r2', 'r1')])
Node(Z):
	Node(D) Node(Z) 	set([('r5',)])
Node(C):
	Node(D) Node(Z) Node(C) 	set([('r5', 'r4')])
Node(E):
	Node(D) Node(E) 	set([('_r7',)])
Node(F):
	Node(D) Node(Z) Node(F) 	set([('r5', 'r1')])
	Node(D) Node(E) Node(F) 	set([('_r7', '_r3')])
Node(H):
	Node(D) Node(E) Node(H) 	set([('_r7', 'r3')])


# Data Zig (Graph #2)

In [15]:
data_zig = [
    ['s', 'r1', '1'],
    ['s', 'r4', '2'],
    ['1', 'r2', '2'],
    ['1', 'r5', 'e'],
    ['2', 'r3', 'e'],
]
data_zig = pd.DataFrame(data_zig, columns=['head', 'relation', 'tail'])
data_zig

Unnamed: 0,head,relation,tail
0,s,r1,1
1,s,r4,2
2,1,r2,2
3,1,r5,e
4,2,r3,e


In [61]:
# build graph in object representation
start_time = time.time()
g = Graph()
g.partial_build_from_df(data_zig)
print("Elapsed time: {}".format(time.time() - start_time))

Elapsed time: 0.00170493125916


In [17]:
sfe = SFE(g)

In [47]:
res = sfe.bfs_node_seqs(sfe.graph.get_node('s'), 1)

In [48]:
debug_print(res)

['Node(1)', 'Node(2)']


In [49]:
# print ending nodes and their respective node sequences for a bfs_node_seqs
for end_node,paths in res.iteritems():
    print end_node
    for path in paths:
        debug_print(path)
    print 

Node(1)
['Node(s)', 'Node(1)']

Node(2)
['Node(s)', 'Node(2)']



In [58]:
head = sfe.graph.get_node('s')
tail = sfe.graph.get_node('e')
head_node_seqs = sfe.bfs_node_seqs(head, 10)
tail_node_seqs = sfe.bfs_node_seqs(tail, 10)
node_seqs = sfe.merge_node_sequences(head, tail, head_node_seqs, tail_node_seqs)
edge_seqs = sfe.get_edge_seqs(node_seqs)
paths = sfe.get_paths(node_seqs)

# for seq in node_seqs:
#     debug_print(seq)
# for seq in edge_seqs:
# #     print seq
# #     print '======='
# #     for e in seq:
# #         print e
#     debug_print(seq)
print paths

set([('r4', '_r2', 'r5'), ('r1', 'r5'), ('r1', 'r2', 'r3'), ('r4', 'r3')])


In [14]:
paths = sfe.search_paths('A', 'B', 3)
paths

KeyError: 'A'

In [9]:
start_time = time.time()

feats = sfe.generate_features(data_zig, max_depth=1)
feats = next(feats)
    
print("Elapsed time: {}".format(time.time() - start_time))

KeyError: '1'

### Features

In [None]:
for idx,f_ in enumerate(feats):
    f = f_[1]
    row = data_zig.iloc[idx]
    print(row['head'], row['relation'], row['tail'], f)

### Paths

# Debug

In [34]:
s1 = set([1,2,3])
s2 = set([2,3,4])
s3 = set([3,4,5])

In [None]:
s1.difference(s2)

In [None]:
s1.union(set([44,66,99]))

In [None]:
s1

In [None]:
s1.__name__

In [None]:
d = {}
d['f'] = d.get('f', []) + ['fasjpodfijjj']

In [None]:
d

In [None]:
'fy' in d

In [105]:
sfe.graph.nodes

{'A': <sfe.sfe.Node at 0x7fe1bdc5c250>,
 'B': <sfe.sfe.Node at 0x7fe1bdc782d0>,
 'C': <sfe.sfe.Node at 0x7fe1bdc5c550>,
 'D': <sfe.sfe.Node at 0x7fe1bdc5ce50>,
 'E': <sfe.sfe.Node at 0x7fe1bdc5c450>,
 'F': <sfe.sfe.Node at 0x7fe1bdc5c490>,
 'H': <sfe.sfe.Node at 0x7fe1bdf26c90>,
 'I': <sfe.sfe.Node at 0x7fe1bdf26c10>,
 'J': <sfe.sfe.Node at 0x7fe1bdf26d50>,
 'Z': <sfe.sfe.Node at 0x7fe1bdc5cad0>}

In [107]:
A = sfe.graph.get_node('A')

In [113]:
A.edge_fan_out

{'r1': 1, 'r2': 1}

In [117]:
sum(A.edge_fan_out.values())

2

In [12]:
node = sfe.graph.get_node('D')

In [13]:
node.out_edge2neighbors

{'r5': {<sfe.sfe2.Node at 0x7fe0ac923d10>}}

In [14]:
node.in_edge2neighbors

{'r2': {<sfe.sfe2.Node at 0x7fe0d54415d0>},
 'r7': {<sfe.sfe2.Node at 0x7fe0ac28bf90>}}

In [15]:
node.edge_fan_out

{'r2': 1, 'r5': 1, 'r7': 1}

In [16]:
node.fan_out

3

In [17]:
node.name

'D'