# Exploring Trees from the ETCBC Lingo Module

Explore trees from the lingo module and examine the level of subphrase embedding that is reflected in the trees. 

In [12]:
import sys, os, collections, re
from tf.fabric import Fabric

# import custom modules
bhsa_visual = '~/github/bhsa/programs' # for shebanq-like visualizations
tree_utils = '~/github/etcbc/lingo'    # tree methods for examining trees
sys.path.append(os.path.expanduser(bhsa_visual))
sys.path.append(os.path.expanduser(tree_utils))

from bhsa import Bhsa 
from trees.utils import structure, layout

In [43]:
TF = Fabric(locations='~/github/etcbc', modules=['bhsa/tf/2017', 'lingo/trees/tf/2017'])
api = TF.load('''
                book chapter verse
                sp pdp function tree
                lex g_cons_utf8
              ''')
api.makeAvailableIn(globals())

# instantiate visualizer
B = Bhsa(api, TF, version='2017')

This is Text-Fabric 3.2.5
Api reference : https://github.com/Dans-labs/text-fabric/wiki/Api
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

116 features found and 0 ignored
  0.00s loading features ...
   |     0.01s B book                 from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.01s B chapter              from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.01s B verse                from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.18s B g_cons_utf8          from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.14s B sp                   from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.12s B pdp                  from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.06s B function             from /Users/cody/github/etcbc/bhsa/tf/2017
   |     0.05s B tree                 from /Users/cody/github/etcbc/lingo/trees/tf/2017
   |     0.13s B lex                  f

# Tree Set-Up

Loading tree data for the first time...

In [17]:
gen_1 = T.nodeFromSection(('Genesis', 1, 1))
gen_1_sent = L.d(gen_1, 'sentence')[0]
gen_1_words = L.d(gen_1_sent, 'word')

F.tree.v(gen_1_sent)

'(S(C(PP(pp 0)(n 1))(VP(vb 2))(NP(n 3))(PP(U(pp 4)(dt 5)(n 6))(cj 7)(U(pp 8)(dt 9)(n 10)))))'

In [25]:
structure(F.tree.v(gen_1_sent))

['S',
 ['C',
  ['PP', [('pp', 0)], [('n', 1)]],
  ['VP', [('vb', 2)]],
  ['NP', [('n', 3)]],
  ['PP',
   ['U', [('pp', 4)], [('dt', 5)], [('n', 6)]],
   [('cj', 7)],
   ['U', [('pp', 8)], [('dt', 9)], [('n', 10)]]]]]

Getting Hebrew into the number slots...

In [18]:
# code via Dirk Roorda
# https://github.com/ETCBC/lingo/blob/master/trees/example.ipynb

numPattern = re.compile('[0-9]+')

def fillWords(tree, start, wordRep):
    def numReplace(match):
        return wordRep(int(match.group(0)) + start)
    return numPattern.sub(numReplace, tree)

In [21]:
fillWords(F.tree.v(gen_1_sent), gen_1_words[0], F.lex.v)

'(S(C(PP(pp B)(n R>CJT/))(VP(vb BR>[))(NP(n >LHJM/))(PP(U(pp >T)(dt H)(n CMJM/))(cj W)(U(pp >T)(dt H)(n >RY/)))))'

In [27]:
completed_sent = fillWords(F.tree.v(gen_1_sent), gen_1_words[0], F.lex.v)

In [47]:
test = L.u(829049, 'sentence')[0]
test_tree = F.tree.v(test)
first_w = L.d(test, 'word')[0]

B.show([(test,)])


## Result 0
(**sentence** `1216242`)


In [48]:
print(layout(structure(test_tree), first_w, F.g_cons_utf8.v))

  S
    C
      CP
        cj ו
      VP
        vb הקמנו
      PP
        pp עליו
      NP
        U
          U
            n שׁבעה
          U
            vb רעים
        cj ו
        U
          U
            n שׁמנה
          U
            U
              n נסיכי
            U
              n אדם
