# Template Lab

Evaluating and constructing TF search templates

In [1]:
import collections, os, sys, random, re
from tf.fabric import Fabric
from tf.extra.bhsa import Bhsa
os.sys.path.append('..')
from experiments2 import Experiment
from semspace import SemSpace

bhsa_data_paths=['~/github/etcbc/bhsa/tf/c',
                 '~/github/verb_semantics/project_code/lingo/heads/tf/c',
                 '~/github/verb_semantics/project_code/sdbh']

TF = Fabric(bhsa_data_paths)
tf_api = TF.load('''
                function lex vs language
                pdp freq_lex gloss domain ls
                mother rela typ sp st code txt
                heads prep_obj
                prs prs_gn prs_nu prs_ps
                sem_domain sem_domain_code
              ''', silent=True)

tf_api.makeAvailableIn(globals())
B = Bhsa(api=tf_api, name='', version='c')

This is Text-Fabric 4.3.4
Api reference : https://dans-labs.github.io/text-fabric/Api/General/
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

118 features found and 0 ignored


**Documentation:** <a target="_blank" href="https://etcbc.github.io/bhsa" title="{provenance of this corpus}">BHSA</a> <a target="_blank" href="https://etcbc.github.io/bhsa/features/hebrew/c/0_home.html" title="{CORPUS} feature documentation">Feature docs</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/Bhsa/" title="BHSA API documentation">BHSA API</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/" title="text-fabric-api">Text-Fabric API 4.3.4</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/#search-templates" title="Search Templates Introduction and Reference">Search Reference</a>


This notebook online:
<a target="_blank" href="http://nbviewer.jupyter.org/github/verb_semantics/project_code/blob/master/datareview/.ipynb">NBViewer</a>
<a target="_blank" href="https://github.com/verb_semantics/project_code/blob/master/datareview/.ipynb">GitHub</a>


In [184]:
class validateFrame:
    '''
    This class prepares frame validation data
    and then filters results based on the prepared
    data.
    '''
    
    def __init__(self, mother_templates=tuple(), 
                       daughter_templates=tuple(), 
                       mother_ri = 0,
                       daughter_ri = 3,
                       exp_name = ''):
    
        print(f'Preparing frame validation data for {exp_name}...')

        self.good_mothers = set()
        self.good_daughters = collections.defaultdict(set)
        self.daughter_ri = daughter_ri
        self.mother_ri = mother_ri

        print(f'\tpreparing good mother set...')
        for mom in mother_templates:
            results = set(S.search(mom))
            self.good_mothers |= set(r[mother_ri] for r in results) 

        print(f'\tpreparing good daughter set...')
        for daught in daughter_templates:
            results = set(S.search(daught))
            for r in results:
                rela = F.rela.v(r[daughter_ri])
                self.good_daughters[rela].add(r[daughter_ri])

        print(f'\t√ Frame validation data prep complete.')
    
    def mothers(self, results):
        '''
        Checks both a mother and her daughters
        for validity.
        '''
        check_relas = set(self.good_daughters.keys())
        validated_results = []
        for r in results:
            mother = r[self.mother_ri]
            check_mother_daughters = all([d in self.good_daughters[F.rela.v(d)] for d in E.mother.t(mother)
                                              if F.rela.v(d) in check_relas])
            if mother in self.good_mothers and check_mother_daughters:
                validated_results.append(r)
        return validated_results
                
    def daughters(self, results):
        '''
        Checks daughters for validity.
        '''
        check_relas = set(self.good_daughters.keys())
        validated_results = []
        for r in results:
            if all([d in self.good_daughters[F.rela.v(d)] for d in E.mother.t(r[0]) # NB: Assume mother is i=0
                        if F.rela.v(d) in check_relas]):
                validated_results.append(r)
        return validated_results

In [196]:
# standard predicate target template

pred_target = '''

c1:clause
    p1:phrase

    /with/
    clause typ#Ptcp
        p:phrase function={pred_funct}
            -heads> word pdp=verb language=Hebrew
        p = p1
    /or/
    clause typ=Ptcp
        p:phrase function={ptcp_funct}
            -heads> word pdp=verb language=Hebrew
        p = p1
    /-/

        target:word pdp=verb
    
{basis}

lex freq_lex>9
   lexword:word 
   lexword = target
'''

all_preds = 'Pred|PreO|PreS|PtcO' # all predicate phrase functions
all_ptcp = 'PreC|PtcO'

vf_allarg_pa_speech = pred_target.format(basis='''

c3:clause
c2:clause
/without/
    phrase function=Rela
/-/
    ca1:clause_atom
    
ca2:clause_atom code=999

c1 = c2
ca1 <mother- ca2
ca1 <: ca2
ca1 [[ p1
c3 [[ ca2
''', pred_funct=all_preds, ptcp_funct=all_ptcp)

test = B.search(vf_allarg_pa_speech)

4681 results


In [207]:
for r in test:
    
    test_mom = r[0]

    relas = set(F.rela.v(d) for d in E.mother.t(tes))

Exception: (431732, 664180, 20553, 431733, 431732, 519975, 519976, 1437607, 20553)

In [209]:
E.mother.t(431732)

(431737,)

In [208]:
B.show([(431732, 664180, 20553, 431733, 431732, 519975, 519976, 1437607, 20553)], withNodes=True)


##### Passage 1



##### Passage 2


## Tests

In [135]:
B.prettySetup(features={'sem_domain', 'sem_domain_code'})

B.pretty(L.u(33557, 'clause')[0], withNodes=True)

In [134]:
re.findall('1\.00[1-3][0-9]*|2\.[0-9]*', '1.004003')

[]

In [140]:
verb_sem_domain = set()

count = 0

for verb in F.pdp.s('verb'):
    
    if F.sem_domain_code.v(verb) in {'1.004003', '1.004005'}:
        
        count += 1

In [171]:
T.text(E.heads.f(688144))

'בַּעֲדֹו֙ בְעַ֣ד בְעַ֖ד '

In [178]:
B.show([(491812,
   839177,
   316054,
   839179,
   316056,
   1442725,
   316054)], withNodes=True)


##### Passage 1



##### Passage 2
