In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tf.fabric import Fabric
from tf.convert.walker import CV
import cProfile, pstats, io
from pstats import SortKey

In [3]:
TF_PATH = '_temp/tf'

# Make test set

In [13]:
TF = Fabric(locations=TF_PATH, silent=True)

In [34]:
slotType = 'slot'
generic = {
    'name': 'test set for query strategy testing',
    'compiler': 'Dirk Roorda',
}
otext = {
    'fmt:text-orig-full': '{num}{cat} ',
    'sectionTypes': 'chunk',
    'sectionFeatures': 'num',
}
intFeatures = {
  'num',
}
featureMeta = {
    'num': {
        'description': 'node number',
    },
    'cat': {
        'description': 'category: m f n',
    },
}

nSlots = 40000
chunkSize = 4
cats = ['m', 'f', 'n']

def director(cv):
  c = None
  for n in range(nSlots):
    if n % chunkSize == 0:
      cv.terminate(c)
      c = cv.node('chunk')
      cv.feature(c, num=n // chunkSize)
    s = cv.slot()
    cv.feature(s, num=n, cat=cats[n % 3])
  cv.terminate(c)
    
cv = CV(TF)

good = cv.walk(
    director,
    slotType,
    otext=otext,
    generic=generic,
    intFeatures=intFeatures,
    featureMeta=featureMeta,
)

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |     0.00s No structure nodes will be set up
   |   SECTION   TYPES:    chunk
   |   SECTION   FEATURES: num
   |   STRUCTURE TYPES:    
   |   STRUCTURE FEATURES: 
   |   TEXT      FEATURES:
   |      |   text-orig-full       cat, num
   |     0.00s OK
   |     0.00s Following director... 
   |     0.14s "edge" actions: 0
   |     0.14s "feature" actions: 50000
   |     0.14s "node" actions: 10000
   |     0.14s "resume" actions: 0
   |     0.14s "slot" actions: 40000
   |     0.14s "terminate" actions: 10001
   |      10000 x "chunk" node 
   |      40000 x "slot" node  = slot type
   |      50000 nodes of all types
   |     0.15s OK
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.01s OK
   |     0.00s reordering nodes ...
   |     0.05s Sorting 10000 nodes of type "chunk"
   |     0.06s Max node = 50000
   |     0.06

# Load test set

In [35]:
TF = Fabric(locations=TF_PATH, silent='deep')
api = TF.loadAll()
docs = api.makeAvailableIn(globals())
silentOff()

# Test use of shallow

In [36]:
query = '''
chunk
  slot num=1
  < slot
'''

In [37]:
list(S.search(query))

BOUNDDIR {2: 1}


[(40001, 2, 3), (40001, 2, 4)]

In [38]:
list(S.search(query, shallow=2))

BOUNDDIR {2: 1}


[(40001, 2)]

In [39]:
query = '''
slot
<: slot
< slot
<: slot
< slot
<: slot
'''

In [40]:
query = '''
chunk
  =: a:slot
  < b:slot
  < c:slot
  :=

s:slot

a < s
s < b
s < c
'''

In [41]:
S.study(query)

  0.00s Checking search template ...
  0.00s Setting up search space for 5 objects ...
  0.03s Constraining search space with 10 relations ...
  0.10s 	2 edges thinned
  0.10s Setting up retrieval plan with strategy small_choice_multi ...
BOUNDDIR {5: 1, 3: -1, 11: {0: -1, 1: -1, 2: 1}}
  0.12s Ready to deliver results from 110000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [42]:
S.showPlan(details=True)

Search with 5 objects and 8 relations
Results are instantiations of the following objects:
node  0-chunk                                          10000   choices
node  1-slot                                           10000   choices
node  2-slot                                           40000   choices
node  3-slot                                           10000   choices
node  4-slot                                           40000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk          10000   choices
edge        0-chunk            [[     3-slot               1.0 choices
edge        3-slot             :=     0-chunk              0   choices
edge        0-chunk            =:     1-slot               1.0 choices (thinned)
edge        1-slot             ]]     0-chunk              0   choices
edge      

In [43]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=2)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         1030027 function calls (890027 primitive calls) in 2.192 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    2.192    1.096 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    2.192    1.096 {built-in method builtins.exec}
        1    0.000    0.000    2.192    2.192 <ipython-input-43-f769f1ab4bfb>:3(<module>)
        1    0.000    0.000    2.192    2.192 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    2.192    2.192 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
        1    0.000    0.000    2.192    2.192 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:692(deliver)
 140001/1    1.989    0.000    2.192    2.192 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:701(stitchOn

In [10]:
print(results)

((400001, 1, 4, 2, 3), (400002, 5, 8, 6, 7))


In [150]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.00s 2
   |     0.43s 3
   |     0.43s 4
   |     0.86s 5
   |     0.86s 6
   |     1.28s 7
   |     1.28s 8
   |     1.70s 9
   |     1.70s 10
  1.70s Done: 10 results
