In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tf.fabric import Fabric
from tf.convert.walker import CV
import cProfile, pstats, io
from pstats import SortKey

In [3]:
TF_PATH = '_temp/tf'

# Make test set

In [4]:
TF = Fabric(locations=TF_PATH, silent=True)

In [5]:
slotType = 'slot'
generic = {
    'name': 'test set for query strategy testing',
    'compiler': 'Dirk Roorda',
}
otext = {
    'fmt:text-orig-full': '{num}{cat} ',
    'sectionTypes': 'chunk',
    'sectionFeatures': 'num',
}
intFeatures = {
  'num',
}
featureMeta = {
    'num': {
        'description': 'node number',
    },
    'cat': {
        'description': 'category: m f n',
    },
}

nSlots = 400000
chunkSize = 4
cats = ['m', 'f', 'n']

def director(cv):
  c = None
  for n in range(nSlots):
    if n % chunkSize == 0:
      cv.terminate(c)
      c = cv.node('chunk')
      cv.feature(c, num=n // chunkSize)
    s = cv.slot()
    cv.feature(s, num=n, cat=cats[n % 3])
  cv.terminate(c)
    
cv = CV(TF)

good = cv.walk(
    director,
    slotType,
    otext=otext,
    generic=generic,
    intFeatures=intFeatures,
    featureMeta=featureMeta,
)

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |     0.00s No structure nodes will be set up
   |   SECTION   TYPES:    chunk
   |   SECTION   FEATURES: num
   |   STRUCTURE TYPES:    
   |   STRUCTURE FEATURES: 
   |   TEXT      FEATURES:
   |      |   text-orig-full       cat, num
   |     0.01s OK
   |     0.00s Following director... 
   |     1.43s "edge" actions: 0
   |     1.44s "feature" actions: 500000
   |     1.44s "node" actions: 100000
   |     1.44s "resume" actions: 0
   |     1.44s "slot" actions: 400000
   |     1.44s "terminate" actions: 100001
   |     100000 x "chunk" node 
   |     400000 x "slot" node  = slot type
   |     500000 nodes of all types
   |     1.51s OK
   |     0.00s checking for nodes and edges ... 
   |     0.00s OK
   |     0.00s checking features ... 
   |     0.11s OK
   |     0.00s reordering nodes ...
   |     0.09s Sorting 100000 nodes of type "chunk"
   |     0.23s Max node = 500000
   |   

# Load test set

In [4]:
TF = Fabric(locations=TF_PATH, silent='deep')
api = TF.loadAll()
docs = api.makeAvailableIn(globals())
silentOff()

# Test use of shallow

In [5]:
query = '''
chunk
  slot num=1
  < slot
'''

In [6]:
list(S.search(query))

[(400001, 2, 3), (400001, 2, 4)]

In [7]:
list(S.search(query, shallow=2))

[(400001, 2)]

In [8]:
query = '''
slot
<: slot
< slot
<: slot
< slot
<: slot
'''

In [35]:
query = '''
chunk
  =: a:slot
  < b:slot
  < c:slot
  :=

s:slot

a < s
s < b
s < c
'''

# Main test1

This query template consists of a `chunk` and its first and last nodes,
and an independent slot that is constrained between those nodes.

In [37]:
query = '''
chunk
  =: a:slot
  < c:slot
  :=

s:slot

a < s
s < c
'''

First we run it with a few old strategies.
The strategies are not really documented, except from 
comments in the code
because they are an implementation detail.
In case you're interested, click the strategy names to go to the code:

* [`small_choice_first`](https://github.com/annotation/text-fabric/blob/85db305f357466d4735edc7aea4cdfaae6ef6774/tf/search/stitch.py#L152-L219)
* [`small_choice_multi`](https://github.com/annotation/text-fabric/blob/85db305f357466d4735edc7aea4cdfaae6ef6774/tf/search/stitch.py#L222-L347)
* [`by_yarn_size`](https://github.com/annotation/text-fabric/blob/85db305f357466d4735edc7aea4cdfaae6ef6774/tf/search/stitch.py#L350-L425)

The third one `by_yarn_size` is virtually identical for the kind of queries we are testing here.
So we concentrate on the first two.

When we run the experiments, we do these steps:

* study
* show plan
* fetch 10 results under a profiler and collect statistics

## Strategy: small choice first

In [50]:
S.study(query, strategy='small_choice_first')

  0.00s Checking search template ...
  0.00s Setting up search space for 4 objects ...
  0.18s Constraining search space with 7 relations ...
  0.52s 	2 edges thinned
  0.52s Setting up retrieval plan with strategy small_choice_first ...
  0.53s Ready to deliver results from 700000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [51]:
S.showPlan(details=True)

Search with 4 objects and 7 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          100000   choices
node  3-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            :=     2-slot               1.0 choices (thinned)
edge        0-chunk            [[     2-slot               0   choices
edge        0-chunk            [[     1-slot               1.0 choices
edge        1-slot             =:     0-chunk              0   choices
edge        1-slot             <      2-slot               0   choices
edge      

In [52]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=10)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         8000242 function calls (6400148 primitive calls) in 2.501 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    2.500    1.250 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    2.500    1.250 {built-in method builtins.exec}
        1    0.000    0.000    2.500    2.500 <ipython-input-52-87271d1c4549>:3(<module>)
        1    0.000    0.000    2.500    2.500 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    2.500    2.500 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
       11    0.000    0.000    2.500    0.227 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
1600105/11    1.723    0.000    2.500    0.227 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(stitch

In [53]:
print('\n'.join(str(r) for r in results))

(400001, 1, 4, 2)
(400001, 1, 4, 3)
(400002, 5, 8, 6)
(400002, 5, 8, 7)
(400003, 9, 12, 10)
(400003, 9, 12, 11)
(400004, 13, 16, 14)
(400004, 13, 16, 15)
(400005, 17, 20, 18)
(400005, 17, 20, 19)


In [54]:
S.count(progress=1, limit=50)

  0.00s Counting results per 1 up to 50 ...
   |     0.00s 1
   |     0.00s 2
   |     0.35s 3
   |     0.35s 4
   |     0.69s 5
   |     0.69s 6
   |     1.03s 7
   |     1.03s 8
   |     1.36s 9
   |     1.36s 10
   |     1.70s 11
   |     1.70s 12
   |     2.06s 13
   |     2.06s 14
   |     2.48s 15
   |     2.48s 16
   |     2.84s 17
   |     2.84s 18
   |     3.18s 19
   |     3.18s 20
   |     3.51s 21
   |     3.51s 22
   |     3.85s 23
   |     3.85s 24
   |     4.18s 25
   |     4.19s 26
   |     4.52s 27
   |     4.53s 28
   |     4.86s 29
   |     4.86s 30
   |     5.20s 31
   |     5.20s 32
   |     5.54s 33
   |     5.54s 34
   |     5.88s 35
   |     5.88s 36
   |     6.33s 37
   |     6.33s 38
   |     6.71s 39
   |     6.71s 40
   |     7.04s 41
   |     7.05s 42
   |     7.47s 43
   |     7.47s 44
   |     7.85s 45
   |     7.85s 46
   |     8.20s 47
   |     8.20s 48
   |     8.53s 49
   |     8.53s 50
  8.53s Done: 50 results


## Strategy: small choice multi

In [73]:
S.study(query, strategy='small_choice_multi')

  0.00s Checking search template ...
  0.00s Setting up search space for 4 objects ...
  0.17s Constraining search space with 7 relations ...
  0.53s 	2 edges thinned
  0.53s Setting up retrieval plan with strategy small_choice_multi ...
  0.55s Ready to deliver results from 700000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [74]:
S.showPlan(details=True)

Search with 4 objects and 6 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          100000   choices
node  3-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            :=     2-slot               1.0 choices (thinned)
edge        0-chunk            [[     2-slot               0   choices
edge        0-chunk            [[     1-slot               1.0 choices
edge        1-slot             =:     0-chunk              0   choices
edge        1-slot             <      2-slot               0   choices
edge      

Observe how two `< >` constraints have been taken together.
They will be tested in one pass.

In [75]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=10)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         9600336 function calls (9600226 primitive calls) in 1.674 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    2.695    1.347 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    2.695    1.347 {built-in method builtins.exec}
        1    0.000    0.000    2.695    2.695 <ipython-input-75-87271d1c4549>:3(<module>)
        1    0.000    0.000    2.695    2.695 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    2.695    2.695 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
       11    0.000    0.000    2.695    0.245 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
   121/11    0.000    0.000    2.695    0.245 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(stitchO

In [76]:
print('\n'.join(str(r) for r in results))

(400001, 1, 4, 2)
(400001, 1, 4, 3)
(400002, 5, 8, 6)
(400002, 5, 8, 7)
(400003, 9, 12, 10)
(400003, 9, 12, 11)
(400004, 13, 16, 14)
(400004, 13, 16, 15)
(400005, 17, 20, 18)
(400005, 17, 20, 19)


In [77]:
S.count(progress=1, limit=50)

  0.00s Counting results per 1 up to 50 ...
   |     0.00s 1
   |     0.00s 2
   |     0.39s 3
   |     0.39s 4
   |     0.77s 5
   |     0.77s 6
   |     1.14s 7
   |     1.14s 8
   |     1.51s 9
   |     1.51s 10
   |     1.88s 11
   |     1.88s 12
   |     2.25s 13
   |     2.25s 14
   |     2.62s 15
   |     2.62s 16
   |     2.99s 17
   |     2.99s 18
   |     3.35s 19
   |     3.35s 20
   |     3.72s 21
   |     3.72s 22
   |     4.09s 23
   |     4.09s 24
   |     4.46s 25
   |     4.46s 26
   |     4.83s 27
   |     4.83s 28
   |     5.20s 29
   |     5.20s 30
   |     5.57s 31
   |     5.57s 32
   |     5.94s 33
   |     5.94s 34
   |     6.30s 35
   |     6.30s 36
   |     6.67s 37
   |     6.67s 38
   |     7.05s 39
   |     7.05s 40
   |     7.41s 41
   |     7.42s 42
   |     7.81s 43
   |     7.81s 44
   |     8.19s 45
   |     8.19s 46
   |     8.55s 47
   |     8.56s 48
   |     8.93s 49
   |     8.93s 50
  8.93s Done: 50 results


## Observations:

For this query, `by_yarn_size` and `small_choice_first` have the same plan and performance.

But `small_choice_multi` has a better performance.

It does only 60% of the function calls that the others do.

`small_choice_multi` has a ratio of primitive calls versus other (slower) function calls of 100%.

For the others it is 80%.

Concretely: 1,600,000 function calls dropped to 121 .

And 6,400,000 primitive calls dropped to 4,800,000 .

But note that the time it takes to get 50 results is worse for the last strategy.

There is a price: the most time consuming bit is this line:

```python
(m for m in yarnT if all(r[i](stitch[x], m) for (i, x) in enumerate(f)))
```

The yarn is the space of nodes where we have to find an instantiation of an object in the template.
The instantiation must satisfy all the constraints.

In this case, the yarn contains basically all slots, the constraints are the `r[i]`, which are `<` or `>` conditions
with respect to two already known nodes: `stitch[x1]` and `stitch[x2]`.

Running the `all()` statement so often, has a cost, although it is still a primitive operation.

# Main test 2

We leave out something of the query.

In [78]:
query = '''
chunk
  =: a:slot
  c:slot
  :=

s:slot

a < s
s < c
'''

It should not make a difference to the outcome that we omit the `a < c` condition, since all chunks have a length greater than 1,
so the first slot of a chunk is always before the last one (and not identical with it).

## Strategy: small choice first

In [84]:
S.study(query, strategy='small_choice_first')

  0.00s Checking search template ...
  0.00s Setting up search space for 4 objects ...
  0.18s Constraining search space with 6 relations ...
  0.53s 	2 edges thinned
  0.53s Setting up retrieval plan with strategy small_choice_first ...
  0.55s Ready to deliver results from 700000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [85]:
S.showPlan(details=True)

Search with 4 objects and 6 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          100000   choices
node  3-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            [[     2-slot               1.0 choices
edge        2-slot             :=     0-chunk              0   choices
edge        0-chunk            [[     1-slot               1.0 choices
edge        1-slot             =:     0-chunk              0   choices
edge        2-slot             >      3-slot          200000.0 choices
edge        3-slot  

In [86]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=10)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         1600541 function calls (1600381 primitive calls) in 0.371 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    0.370    0.185 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    0.370    0.185 {built-in method builtins.exec}
        1    0.000    0.000    0.370    0.370 <ipython-input-86-87271d1c4549>:3(<module>)
        1    0.000    0.000    0.370    0.370 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    0.370    0.370 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
       11    0.000    0.000    0.370    0.034 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
   171/11    0.000    0.000    0.370    0.034 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(stitchO

In [87]:
print('\n'.join(str(r) for r in results))

(400001, 1, 4, 2)
(400001, 1, 4, 3)
(400002, 5, 8, 6)
(400002, 5, 8, 7)
(400003, 9, 12, 10)
(400003, 9, 12, 11)
(400004, 13, 16, 14)
(400004, 13, 16, 15)
(400005, 17, 20, 18)
(400005, 17, 20, 19)


In [88]:
S.count(progress=1, limit=50)

  0.00s Counting results per 1 up to 50 ...
   |     0.00s 1
   |     0.00s 2
   |     0.07s 3
   |     0.07s 4
   |     0.12s 5
   |     0.12s 6
   |     0.18s 7
   |     0.18s 8
   |     0.23s 9
   |     0.23s 10
   |     0.29s 11
   |     0.29s 12
   |     0.34s 13
   |     0.34s 14
   |     0.40s 15
   |     0.40s 16
   |     0.46s 17
   |     0.46s 18
   |     0.51s 19
   |     0.51s 20
   |     0.57s 21
   |     0.57s 22
   |     0.63s 23
   |     0.63s 24
   |     0.68s 25
   |     0.68s 26
   |     0.73s 27
   |     0.73s 28
   |     0.79s 29
   |     0.79s 30
   |     0.85s 31
   |     0.85s 32
   |     0.90s 33
   |     0.90s 34
   |     0.96s 35
   |     0.96s 36
   |     1.02s 37
   |     1.02s 38
   |     1.07s 39
   |     1.07s 40
   |     1.12s 41
   |     1.12s 42
   |     1.17s 43
   |     1.17s 44
   |     1.23s 45
   |     1.23s 46
   |     1.28s 47
   |     1.28s 48
   |     1.33s 49
   |     1.33s 50
  1.33s Done: 50 results


## Strategy: small choice multi

In [89]:
S.study(query, strategy='small_choice_multi')

  0.00s Checking search template ...
  0.00s Setting up search space for 4 objects ...
  0.18s Constraining search space with 6 relations ...
  0.53s 	2 edges thinned
  0.53s Setting up retrieval plan with strategy small_choice_multi ...
  0.55s Ready to deliver results from 700000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [90]:
S.showPlan(details=True)

Search with 4 objects and 5 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          100000   choices
node  3-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            [[     2-slot               1.0 choices
edge        2-slot             :=     0-chunk              0   choices
edge        0-chunk            [[     1-slot               1.0 choices
edge        1-slot             =:     0-chunk              0   choices
edge      2,1-slot            >,<     3-slot           20000.0 choices
  6.39s The results 

Observe how two `< >` constraints have been taken together.
They will be tested in one pass.

In [91]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=10)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         6400473 function calls (6400378 primitive calls) in 0.901 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000    1.842    0.921 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000    1.842    0.921 {built-in method builtins.exec}
        1    0.000    0.000    1.842    1.842 <ipython-input-91-87271d1c4549>:3(<module>)
        1    0.000    0.000    1.842    1.842 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000    1.842    1.842 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
       11    0.000    0.000    1.842    0.167 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
   106/11    0.000    0.000    1.842    0.167 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(stitchO

In [92]:
print('\n'.join(str(r) for r in results))

(400001, 1, 4, 2)
(400001, 1, 4, 3)
(400002, 5, 8, 6)
(400002, 5, 8, 7)
(400003, 9, 12, 10)
(400003, 9, 12, 11)
(400004, 13, 16, 14)
(400004, 13, 16, 15)
(400005, 17, 20, 18)
(400005, 17, 20, 19)


In [93]:
S.count(progress=1, limit=50)

  0.00s Counting results per 1 up to 50 ...
   |     0.00s 1
   |     0.00s 2
   |     0.30s 3
   |     0.30s 4
   |     0.58s 5
   |     0.58s 6
   |     0.88s 7
   |     0.88s 8
   |     1.16s 9
   |     1.16s 10
   |     1.45s 11
   |     1.45s 12
   |     1.83s 13
   |     1.83s 14
   |     2.17s 15
   |     2.17s 16
   |     2.47s 17
   |     2.47s 18
   |     2.81s 19
   |     2.81s 20
   |     3.17s 21
   |     3.17s 22
   |     3.49s 23
   |     3.49s 24
   |     3.78s 25
   |     3.78s 26
   |     4.06s 27
   |     4.06s 28
   |     4.35s 29
   |     4.35s 30
   |     4.63s 31
   |     4.63s 32
   |     4.96s 33
   |     4.96s 34
   |     5.31s 35
   |     5.31s 36
   |     5.63s 37
   |     5.63s 38
   |     5.92s 39
   |     5.92s 40
   |     6.21s 41
   |     6.21s 42
   |     6.49s 43
   |     6.49s 44
   |     6.78s 45
   |     6.79s 46
   |     7.07s 47
   |     7.07s 48
   |     7.35s 49
   |     7.35s 50
  7.35s Done: 50 results


# Main test 3

We add something of the query.

In [97]:
query = '''
chunk
  =: a:slot
  < b:slot
  < d:slot
  c:slot
  :=

s:slot

b < s
s < d
'''

It becomes more difficult to constrain s within the chunk.

This is a heavy query.

## Strategy: small choice first

In [102]:
S.study(query, strategy='small_choice_first')

  0.00s Checking search template ...
  0.00s Setting up search space for 6 objects ...
  0.27s Constraining search space with 10 relations ...
  0.93s 	2 edges thinned
  0.93s Setting up retrieval plan with strategy small_choice_first ...
  0.96s Ready to deliver results from 1500000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [103]:
S.showPlan(details=True)

Search with 6 objects and 10 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          400000   choices
node  3-slot                                          400000   choices
node  4-slot                                          100000   choices
node  5-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            :=     4-slot               1.0 choices (thinned)
edge        4-slot             ]]     0-chunk              0   choices
edge        0-chunk            =:     1-slot               1.0 choices (thinned)

In [104]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=10)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         55999651 function calls (44799668 primitive calls) in 17.413 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000   17.413    8.706 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000   17.413    8.706 {built-in method builtins.exec}
        1    0.000    0.000   17.413   17.413 <ipython-input-104-87271d1c4549>:3(<module>)
        1    0.000    0.000   17.413   17.413 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000   17.413   17.413 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
       11    0.000    0.000   17.413    1.583 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
11199994/11   12.022    0.000   17.413    1.583 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(s

In [105]:
print('\n'.join(str(r) for r in results))

(400001, 1, 2, 4, 4, 3)
(400002, 5, 6, 8, 8, 7)
(400003, 9, 10, 12, 12, 11)
(400004, 13, 14, 16, 16, 15)
(400005, 17, 18, 20, 20, 19)
(400006, 21, 22, 24, 24, 23)
(400007, 25, 26, 28, 28, 27)
(400008, 29, 30, 32, 32, 31)
(400009, 33, 34, 36, 36, 35)
(400010, 37, 38, 40, 40, 39)


## Strategy: small choice multi

In [106]:
S.study(query, strategy='small_choice_multi')

  0.00s Checking search template ...
  0.00s Setting up search space for 6 objects ...
  0.29s Constraining search space with 10 relations ...
  0.95s 	2 edges thinned
  0.95s Setting up retrieval plan with strategy small_choice_multi ...
  0.98s Ready to deliver results from 1500000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [107]:
S.showPlan(details=True)

Search with 6 objects and 9 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          100000   choices
node  2-slot                                          400000   choices
node  3-slot                                          400000   choices
node  4-slot                                          100000   choices
node  5-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            :=     4-slot               1.0 choices (thinned)
edge        4-slot             ]]     0-chunk              0   choices
edge        0-chunk            =:     1-slot               1.0 choices (thinned)


In [108]:
pr = cProfile.Profile()
pr.enable()
results = S.fetch(limit=10)
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         67200793 function calls (67200288 primitive calls) in 11.140 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000   17.894    8.947 /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3230(run_code)
        2    0.000    0.000   17.894    8.947 {built-in method builtins.exec}
        1    0.000    0.000   17.894   17.894 <ipython-input-108-87271d1c4549>:3(<module>)
        1    0.000    0.000   17.894   17.894 /Users/dirk/github/annotation/text-fabric/tf/search/search.py:151(fetch)
        1    0.000    0.000   17.894   17.894 /Users/dirk/github/annotation/text-fabric/tf/search/searchexe.py:89(fetch)
       11    0.000    0.000   17.893    1.627 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:683(deliver)
   516/11    0.001    0.000   17.893    1.627 /Users/dirk/github/annotation/text-fabric/tf/search/stitch.py:690(sti

In [109]:
print('\n'.join(str(r) for r in results))

(400001, 1, 2, 4, 4, 3)
(400002, 5, 6, 8, 8, 7)
(400003, 9, 10, 12, 12, 11)
(400004, 13, 14, 16, 16, 15)
(400005, 17, 18, 20, 20, 19)
(400006, 21, 22, 24, 24, 23)
(400007, 25, 26, 28, 28, 27)
(400008, 29, 30, 32, 32, 31)
(400009, 33, 34, 36, 36, 35)
(400010, 37, 38, 40, 40, 39)


# Observation

Here is a query where the amount of time spent in the `stitchOn()` overtakes the time spent in the `all)` call.

So we really have a mixed bag with these strategies.

For now, I turn on the `small_choice_multi` because it makes really long queries a bit more bearable, and does not
make much of a difference for shorter queries.

# Main test 4

A quite different query.

In [110]:
query = '''
chunk
.num. slot
'''

## Strategy: small choice first

In [115]:
S.study(query, strategy='small_choice_first')

  0.00s Checking search template ...
  0.00s Setting up search space for 2 objects ...
  0.08s Constraining search space with 1 relations ...
  0.09s 	0 edges thinned
  0.09s Setting up retrieval plan with strategy small_choice_first ...
  0.12s Ready to deliver results from 500000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [116]:
S.showPlan(details=True)

Search with 2 objects and 1 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk          .num.    1-slot               0.0 choices
  3.46s The results are connected to the original search template as follows:
 0     
 1 R0  chunk
 2 R1  .num. slot
 3     


In [117]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.24s 2
   |     0.46s 3
   |     0.68s 4
   |     0.90s 5
   |     1.12s 6
   |     1.34s 7
   |     1.56s 8
   |     1.78s 9
   |     2.01s 10
  2.01s Done: 10 results


## Strategy: small choice multi

In [118]:
S.study(query, strategy='small_choice_multi')

  0.00s Checking search template ...
  0.00s Setting up search space for 2 objects ...
  0.07s Constraining search space with 1 relations ...
  0.10s 	0 edges thinned
  0.10s Setting up retrieval plan with strategy small_choice_multi ...
  0.12s Ready to deliver results from 500000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [119]:
S.showPlan(details=True)

Search with 2 objects and 1 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk          .num.    1-slot               0.0 choices
  3.16s The results are connected to the original search template as follows:
 0     
 1 R0  chunk
 2 R1  .num. slot
 3     


In [120]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.24s 2
   |     0.46s 3
   |     0.68s 4
   |     0.90s 5
   |     1.12s 6
   |     1.37s 7
   |     1.65s 8
   |     1.90s 9
   |     2.12s 10
  2.13s Done: 10 results


## Strategy: by yarn size

In [121]:
S.study(query, strategy='by_yarn_size')

  0.00s Checking search template ...
  0.00s Setting up search space for 2 objects ...
  0.08s Constraining search space with 1 relations ...
  0.10s 	0 edges thinned
  0.10s Setting up retrieval plan with strategy by_yarn_size ...
  0.12s Ready to deliver results from 500000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [122]:
S.showPlan(details=True)

Search with 2 objects and 1 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk          .num.    1-slot               0.0 choices
  1.80s The results are connected to the original search template as follows:
 0     
 1 R0  chunk
 2 R1  .num. slot
 3     


In [123]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.24s 2
   |     0.46s 3
   |     0.68s 4
   |     0.90s 5
   |     1.12s 6
   |     1.34s 7
   |     1.56s 8
   |     1.84s 9
   |     2.09s 10
  2.10s Done: 10 results


# Main test 5

Yet another feature comparison query.

In [135]:
query = '''
a:chunk
  n:slot
< b:chunk
  m:slot

n .cat. m
'''

## Strategy: small choice first

In [136]:
S.study(query, strategy='small_choice_first')

  0.00s Checking search template ...
  0.00s Setting up search space for 4 objects ...
  0.15s Constraining search space with 4 relations ...
  0.52s 	0 edges thinned
  0.52s Setting up retrieval plan with strategy small_choice_first ...
  0.56s Ready to deliver results from 1000000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [137]:
S.showPlan(details=True)

Search with 4 objects and 4 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          400000   choices
node  2-chunk                                         100000   choices
node  3-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            [[     1-slot               4.0 choices
edge        0-chunk            <      2-chunk          50000.0 choices
edge        2-chunk            [[     3-slot               4.0 choices
edge        3-slot           .cat.    1-slot               0   choices
  3.04s The results are connected to the original search template as follows:
 0     
 1 R0

In [138]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.00s 2
   |     0.00s 3
   |     0.00s 4
   |     0.00s 5
   |     0.00s 6
   |     0.00s 7
   |     0.00s 8
   |     0.01s 9
   |     0.01s 10
  0.01s Done: 10 results


## Strategy: small choice multi

In [129]:
S.study(query, strategy='small_choice_multi')

  0.00s Checking search template ...
  0.04s Setting up search space for 3 objects ...
  0.16s Constraining search space with 4 relations ...
  0.53s 	0 edges thinned
  0.53s Setting up retrieval plan with strategy small_choice_multi ...
  0.56s Ready to deliver results from 900000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [130]:
S.showPlan(details=True)

Search with 3 objects and 4 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          400000   choices
node  2-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            [[     1-slot               4.0 choices
edge        0-chunk            [[     2-slot               4.0 choices
edge        2-slot           .cat.    1-slot               0   choices
edge        1-slot             <      2-slot               0   choices
  2.35s The results are connected to the original search template as follows:
 0     
 1 R0  chunk
 2 R1    a:slot
 3 R2    .cat. b:slot
 4     
 5     a < b
 6  

In [131]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.00s 2
   |     0.00s 3
   |     0.00s 4
   |     0.00s 5
   |     0.00s 6
   |     0.00s 7
   |     0.00s 8
   |     0.01s 9
   |     0.01s 10
  0.01s Done: 10 results


## Strategy: by yarn size

In [132]:
S.study(query, strategy='by_yarn_size')

  0.00s Checking search template ...
  0.00s Setting up search space for 3 objects ...
  0.13s Constraining search space with 4 relations ...
  0.50s 	0 edges thinned
  0.50s Setting up retrieval plan with strategy by_yarn_size ...
  0.54s Ready to deliver results from 900000 nodes
Iterate over S.fetch() to get the results
See S.showPlan() to interpret the results


In [133]:
S.showPlan(details=True)

Search with 3 objects and 4 relations
Results are instantiations of the following objects:
node  0-chunk                                         100000   choices
node  1-slot                                          400000   choices
node  2-slot                                          400000   choices
Performance parameters:
	yarnRatio            =    1.25
	tryLimitFrom         =      40
	tryLimitTo           =      40
Instantiations are computed along the following relations:
node                                  0-chunk         100000   choices
edge        0-chunk            [[     1-slot               4.0 choices
edge        0-chunk            [[     2-slot               4.0 choices
edge        1-slot           .cat.    2-slot               0   choices
edge        1-slot             <      2-slot               0   choices
  3.21s The results are connected to the original search template as follows:
 0     
 1 R0  chunk
 2 R1    a:slot
 3 R2    .cat. b:slot
 4     
 5     a < b
 6  

In [134]:
S.count(progress=1, limit=10)

  0.00s Counting results per 1 up to 10 ...
   |     0.00s 1
   |     0.00s 2
   |     0.00s 3
   |     0.00s 4
   |     0.00s 5
   |     0.00s 6
   |     0.00s 7
   |     0.01s 8
   |     0.01s 9
   |     0.01s 10
  0.01s Done: 10 results
