In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tf.fabric import Fabric
from tf.convert.walker import CV
import cProfile, pstats, io
from pstats import SortKey

In [3]:
TF_PATH = '_temp/tf'

# Make test set

In [4]:
TF = Fabric(locations=TF_PATH, silent=True)

In [5]:
slotType = 'slot'
generic = {
    'name': 'test set for query strategy testing',
    'compiler': 'Dirk Roorda',
}
otext = {
    'fmt:text-orig-full': '{num}{cat} ',
    'sectionTypes': 'chunk',
    'sectionFeatures': 'num',
}
intFeatures = {
  'num',
}
featureMeta = {
    'num': {
        'description': 'node number',
    },
    'cat': {
        'description': 'category: m f n',
    },
}

nSlots = 400000
chunkSize = 4
cats = ['m', 'f', 'n']

def director(cv):
  c = None
  for n in range(nSlots):
    if n % chunkSize == 0:
      cv.terminate(c)
      c = cv.node('chunk')
      cv.feature(c, num=n // chunkSize)
    s = cv.slot()
    cv.feature(s, num=n, cat=cats[n % 3])
  cv.terminate(c)
    
cv = CV(TF)

good = cv.walk(
    director,
    slotType,
    otext=otext,
    generic=generic,
    intFeatures=intFeatures,
    featureMeta=featureMeta,
)

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |     0.00s No structure nodes will be set up
   |   SECTION   TYPES:    chunk
   |   SECTION   FEATURES: num
   |   STRUCTURE TYPES:    
   |   STRUCTURE FEATURES: 
   |   TEXT      FEATURES:
   |      |   text-orig-full       cat, num
   |     0.01s OK
   |     0.00s Following director... 
   |     1.43s "edge" actions: 0
   |     1.44s "feature" actions: 500000
   |     1.44s "node" actions: 100000
   |     1.44s "resume" actions: 0
   |     1.44s "slot" actions: 400000
   |     1.44s "terminate" actions: 100001
   |     100000 x "chunk" node 
   |     400000 x "slot" node  = slot type
   |     500000 nodes of all types
   |     1.51s OK
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.11s OK
   |     0.00s reordering nodes ...
   |     0.09s Sorting 100000 nodes of type "chunk"
   |     0.23s Max node = 500000
   |   

# Load test set

In [6]:
TF = Fabric(locations=TF_PATH, silent='deep')
api = TF.loadAll()
docs = api.makeAvailableIn(globals())
silentOff()

# Test use of shallow

In [7]:
query = '''
chunk
  slot num=1
  < slot
'''

In [8]:
list(S.search(query))

[(400001, 2, 3), (400001, 2, 4)]

In [9]:
list(S.search(query, shallow=2))

[(400001, 2)]

In [10]:
query = '''
slot
<: slot
< slot
<: slot
< slot
<: slot
'''

In [11]:
query = '''
chunk
  =: a:slot
  < b:slot
  < c:slot
  :=

s:slot

a < s
s < b
s < c
'''

In [12]:
S.study(query)

  0.00s Checking search template ...
  0.00s Setting up search space for 5 objects ...
  0.24s Constraining search space with 10 relations ...
  0.75s 	2 edges thinned
  0.75s Setting up retrieval plan with strategy small_choice_multi ...
  0.77s Ready to deliver results from 1100000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [13]:
S.showPlan(details=True)

Search with 5 objects and 8 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          400000   choices
node  3-slot                                          100000   choices
node  4-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            [[     3-slot               1.0 choices
edge        3-slot             :=     0-chunk              0   choices
edge        0-chunk            =:     1-slot               1.0 choices (thinned)
edge        1-slot             ]]     0-chunk              0   choices
edge      

In [14]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=2)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         2400209 function calls (2400162 primitive calls) in 0.976 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    1.395    0.698 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    1.395    0.698 {built-in method builtins.exec}
        1    0.000    0.000    1.395    1.395 <ipython-input-14-f769f1ab4bfb>:3(<module>)
        1    0.000    0.000    1.395    1.395 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    1.395    1.395 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
        3    0.000    0.000    1.395    0.465 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
     50/3    0.000    0.000    1.395    0.465 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(stitchO

In [15]:
print(results)

((400001, 1, 3, 4, 2), (400002, 5, 7, 8, 6))


In [16]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.30s 1
   |     0.88s 2
   |     1.46s 3
   |     2.03s 4
   |     2.60s 5
   |     3.17s 6
   |     3.74s 7
   |     4.32s 8
   |     4.89s 9
   |     5.46s 10
  5.46s Done: 10 results
