In [21]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [154]:
from tf.fabric import Fabric
from tf.convert.walker import CV
import cProfile, pstats, io
from pstats import SortKey

In [23]:
TF_PATH = '_temp/tf'

# Make test set

In [24]:
TF = Fabric(locations=TF_PATH, silent=True)

In [142]:
slotType = 'slot'
generic = {
    'name': 'test set for query strategy testing',
    'compiler': 'Dirk Roorda',
}
otext = {
    'fmt:text-orig-full': '{num}{cat} ',
    'sectionTypes': 'chunk',
    'sectionFeatures': 'num',
}
intFeatures = {
  'num',
}
featureMeta = {
    'num': {
        'description': 'node number',
    },
    'cat': {
        'description': 'category: m f n',
    },
}

nSlots = 400000
chunkSize = 4
cats = ['m', 'f', 'n']

def director(cv):
  c = None
  for n in range(nSlots):
    if n % chunkSize == 0:
      cv.terminate(c)
      c = cv.node('chunk')
      cv.feature(c, num=n // chunkSize)
    s = cv.slot()
    cv.feature(s, num=n, cat=cats[n % 3])
  cv.terminate(c)
    
cv = CV(TF)

good = cv.walk(
    director,
    slotType,
    otext=otext,
    generic=generic,
    intFeatures=intFeatures,
    featureMeta=featureMeta,
)

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |     0.00s No structure nodes will be set up
   |   SECTION   TYPES:    chunk
   |   SECTION   FEATURES: num
   |   STRUCTURE TYPES:    
   |   STRUCTURE FEATURES: 
   |   TEXT      FEATURES:
   |      |   text-orig-full       cat, num
   |     0.01s OK
   |     0.00s Following director... 
   |     1.65s "edge" actions: 0
   |     1.65s "feature" actions: 500000
   |     1.65s "node" actions: 100000
   |     1.65s "resume" actions: 0
   |     1.65s "slot" actions: 400000
   |     1.65s "terminate" actions: 100001
   |     100000 x "chunk" node 
   |     400000 x "slot" node  = slot type
   |     500000 nodes of all types
   |     1.71s OK
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.12s OK
   |     0.00s reordering nodes ...
   |     0.10s Sorting 100000 nodes of type "chunk"
   |     0.25s Max node = 500000
   |   

# Load test set

In [165]:
TF = Fabric(locations=TF_PATH, silent='deep')
api = TF.loadAll()
docs = api.makeAvailableIn(globals())
silentOff()

In [187]:
query = '''
slot
<: slot
< slot
<: slot
< slot
<: slot
'''

In [203]:
query = '''
chunk
  =: a:slot
  b:slot
  :=

c:slot
<: d:slot

a < c
b > d
'''

In [216]:
S.study(query)

  0.00s Checking search template ...
  0.00s Setting up search space for 5 objects ...
  0.22s Constraining search space with 7 relations ...
  0.60s 	2 edges thinned
{frozenset({0, 1, 2}): {frozenset({3, 4}): [5]}, frozenset({3, 4}): {frozenset({0, 1, 2}): [6]}}
{frozenset({3, 4}): {frozenset({0, 1, 2}): [5]}, frozenset({0, 1, 2}): {frozenset({3, 4}): [6]}}
  0.60s complexity: 2.4e+11
2 internally bounded chunks
  0.60s Chunk 3,4 with 800000 nodes in its yarns
edge  3-slot          <:  4-slot          (     1.0 choices)
  0.60s Chunk 0,1,2 with 300000 nodes in its yarns
edge  1-slot          ]]  0-chunk         (     1.0 choices)
edge  0-chunk         =:  1-slot          (     1.0 choices (thinned))
edge  2-slot          ]]  0-chunk         (     1.0 choices)
edge  2-slot          :=  0-chunk         (     0.2 choices (thinned))
  0.60s Edges between chunks:
  0.61s   from 0,1,2 to 3,4
edge  1-slot          <   3-slot          (200000.0 choices)
edge  4-slot          <   2-slot       

In [214]:
S.showPlan(details=True)

Search with 5 objects and 7 relations
Results are instantiations of the following objects:
node  0-chunk                             (100000   choices)
node  1-slot                              (100000   choices)
node  2-slot                              (100000   choices)
node  3-slot                              (400000   choices)
node  4-slot                              (400000   choices)
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                      0-chunk         (100000   choices)
edge  0-chunk         [[  2-slot          (     1.0 choices)
edge  2-slot          :=  0-chunk         (     1.0 choices (thinned))
edge  0-chunk         [[  1-slot          (     1.0 choices)
edge  1-slot          =:  0-chunk         (     1.0 choices (thinned))
edge  1-slot          <   3-slot          (200000.0 choices)
edge  3-slot          <:  4-slot       

In [201]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=2)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         4000107 function calls (3200080 primitive calls) in 1.377 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    1.377    0.689 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    1.377    0.689 {built-in method builtins.exec}
        1    0.000    0.000    1.377    1.377 <ipython-input-201-f769f1ab4bfb>:3(<module>)
        1    0.000    0.000    1.377    1.377 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    1.377    1.377 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:90(fetch)
        3    0.000    0.000    1.377    0.459 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:519(deliver)
 800030/3    1.011    0.000    1.377    0.459 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:526(stitch

In [191]:
print(results)

((1, 2, 3, 4, 5, 6), (1, 2, 3, 4, 6, 7), (1, 2, 3, 4, 7, 8), (1, 2, 3, 4, 8, 9), (1, 2, 3, 4, 9, 10), (1, 2, 3, 4, 10, 11), (1, 2, 3, 4, 11, 12), (1, 2, 3, 4, 12, 13), (1, 2, 3, 4, 13, 14), (1, 2, 3, 4, 14, 15))


In [150]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.00s 2
   |     0.43s 3
   |     0.43s 4
   |     0.86s 5
   |     0.86s 6
   |     1.28s 7
   |     1.28s 8
   |     1.70s 9
   |     1.70s 10
  1.70s Done: 10 results
