# Template Lab

Evaluating and constructing TF search templates

In [151]:
import collections, os, sys, random, re
from tf.fabric import Fabric
from tf.extra.bhsa import Bhsa
os.sys.path.append('..')
from experiments2 import Experiment
from semspace import SemSpace

bhsa_data_paths=['~/github/etcbc/bhsa/tf/c',
                 '~/github/verb_semantics/project_code/lingo/heads/tf/c',
                 '~/github/verb_semantics/project_code/sdbh']

TF = Fabric(bhsa_data_paths)
tf_api = TF.load('''
                function lex vs language
                pdp freq_lex gloss domain ls
                mother rela typ sp st code txt
                heads prep_obj instruction
                prs prs_gn prs_nu prs_ps
                sem_domain sem_domain_code
              ''', silent=True)

tf_api.makeAvailableIn(globals())
B = Bhsa(api=tf_api, name='', version='c')

This is Text-Fabric 4.3.5
Api reference : https://dans-labs.github.io/text-fabric/Api/General/
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

118 features found and 0 ignored
   |     0.23s T instruction          from /Users/cody/github/etcbc/bhsa/tf/c


**Documentation:** <a target="_blank" href="https://etcbc.github.io/bhsa" title="{provenance of this corpus}">BHSA</a> <a target="_blank" href="https://etcbc.github.io/bhsa/features/hebrew/c/0_home.html" title="{CORPUS} feature documentation">Feature docs</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/Bhsa/" title="BHSA API documentation">BHSA API</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/" title="text-fabric-api">Text-Fabric API 4.3.5</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/#search-templates" title="Search Templates Introduction and Reference">Search Reference</a>


This notebook online:
<a target="_blank" href="http://nbviewer.jupyter.org/github/verb_semantics/project_code/blob/master/datareview/.ipynb">NBViewer</a>
<a target="_blank" href="https://github.com/verb_semantics/project_code/blob/master/datareview/.ipynb">GitHub</a>


In [40]:
class validateFrame:
    '''
    This class prepares frame validation data
    and then filters results based on the prepared
    data.
    '''
    
    def __init__(self, mother_templates=tuple(), 
                       daughter_templates=tuple(), 
                       mother_ri = 0,
                       daughter_ri = 3,
                       exp_name = ''):
    
        print(f'Preparing frame validation data for {exp_name}...')

        self.good_mothers = set()
        self.good_daughters = collections.defaultdict(set)
        self.daughter_ri = daughter_ri
        self.mother_ri = mother_ri

        print(f'\tpreparing good mother set...')
        for mom in mother_templates:
            results = set(S.search(mom))
            self.good_mothers |= set(r[mother_ri] for r in results) 

        print(f'\tpreparing good daughter set...')
        for daught in daughter_templates:
            results = set(S.search(daught))
            for r in results:
                rela = F.rela.v(r[daughter_ri])
                self.good_daughters[rela].add(r[daughter_ri])

        print(f'\t√ Frame validation data prep complete.')
    
    def mothers(self, results):
        '''
        Checks both a mother and her daughters
        for validity.
        '''
        check_relas = set(self.good_daughters.keys())
        validated_results = []
        for r in results:
            mother = r[self.mother_ri]
            check_mother_daughters = all([d in self.good_daughters[F.rela.v(d)] for d in E.mother.t(mother)
                                              if F.rela.v(d) in check_relas])
            if mother in self.good_mothers and check_mother_daughters:
                validated_results.append(r)
        return validated_results
                
    def daughters(self, results):
        '''
        Checks daughters for validity.
        '''
        check_relas = set(self.good_daughters.keys())
        validated_results = []
        for r in results:
            if all([d in self.good_daughters[F.rela.v(d)] for d in E.mother.t(r[0]) # NB: Assume mother is i=0
                        if F.rela.v(d) in check_relas]):
                validated_results.append(r)
        return validated_results
    

'''
The following search templates are specialized for
selecting carefully defined clause relations. These
templates have been crafted to select elements from the 
clauses which can easily be lexicalized as basis strings.
It excludes a small numer of clause relations that cannot 
easily be lexicalized, such as verbless clauses without conjunction
elements (i.e. כאשר)
'''
    
clR_vc_CP = '''

#basis @ 6

c2:clause
    p1:phrase typ=CP
    p2:phrase
    /with/
    clause kind=VC rela={relas} typ#Ptcp
        p3:phrase function=Pred|PreS|PreO
        p3 = p2
    /or/
    clause kind=VC rela={relas} typ=Ptcp
        p3:phrase function=PreC|PtcO
        p3 = p2
    /-/

        basis:word pdp=verb {reqs}

c1 <mother- c2
c2 [[ p2
p1 < p2
'''

clR_vc_prep = '''

#basis @ 6

c2:clause
/without/
    phrase typ=CP
/-/
    p2:phrase
    /with/
    clause kind=VC rela={relas} typ#Ptcp
        p:phrase function=Pred|PreS|PreO
        p = p2
    /or/
    clause kind=VC rela={relas} typ=Ptcp
        p:phrase function=PreC|PtcO
        p = p2
    /-/
    
        word pdp=prep
        < word pdp=verb {reqs} 

c1 <mother- c2
'''

clR_vc_verb = '''

#basis @ 5

c2:clause
/without/
    phrase typ=CP
/-/
/without/
    word pdp=prin|inrg
/-/

    p2:phrase
    
    /with/
    clause kind=VC rela={relas} typ#Ptcp
        p:phrase function=Pred|PreS|PreO
        /without/
            word pdp=prep
        /-/
        p = p2
    /or/
    clause kind=VC rela={relas} typ=Ptcp
        p:phrase function=PreC|PtcO
        /without/
            word pdp=prep
        /-/
        p = p2
    /-/
    
        basis:word pdp=verb {reqs}

c1 <mother- c2
'''

clR_nc_CP = '''
c2:clause kind=NC rela={relas}
    phrase typ=CP
    < phrase function=PreC
        -heads> word pdp#prep|prps|prde|prin|inrg {reqs}

c1 <mother- c2
'''

clR_nc_PreC_adv = '''
#only for use with adj/cmpl relations 

c2:clause kind=NC rela={relas}
/without/
    phrase typ=CP
/-/
    phrase function=PreC typ=AdvP
        -heads> word pdp#prep|prps|prde|prin|inrg {reqs}

c1 <mother- c2
'''

clR_nc_PreC_prep = '''
#only for use with adj/cmpl functions 

c2:clause kind=NC rela={relas}
/without/
    phrase typ=CP
/-/
    phrase function=PreC typ=PP
        -heads> word pdp=prep
        -prep_obj> word pdp#prep|prps|prde|prin|inrg {reqs}

c1 <mother- c2
'''

In [184]:
# standard predicate target template

pred_target = '''

c1:clause
    p1:phrase

    /with/
    clause typ#Ptcp
        p:phrase function={pred_funct}
            -heads> word pdp=verb language=Hebrew
        p = p1
    /or/
    clause typ=Ptcp
        p:phrase function={ptcp_funct}
            -heads> word pdp=verb language=Hebrew
        p = p1
    /-/

        target:word pdp=verb
    
{basis}

lex freq_lex>9
   lexword:word 
   lexword = target
'''

all_preds = 'Pred|PreO|PreS|PtcO' # all predicate phrase functions
all_ptcp = 'PreC|PtcO'

vf_clause_conditions = '''

c2:clause
/without/
    phrase function={relas} typ#NP|PrNP|AdvP|PP
/-/
{clause_reqs}

/where/
    phrase function={relas} typ#PP
/have/
    /where/
        -heads> w1:word
    /have/
        w2:word pdp#prep|prps|prde|prin|inrg {word_reqs}
        w1 = w2
    /-/
/-/

/where/
    phrase function={relas} typ=PP
/have/
    /where/
        -heads> word pdp=prep
    /have/
        -prep_obj> word pdp#prep|prps|prde|prin|inrg {word_reqs}
    /-/
/-/

c1 = c2
'''

vf_all_arg_conditions = vf_clause_conditions.format(relas='Objc|Cmpl|Adju|Time|Loca|PrAd', 
                                                    word_reqs='',
                                                    clause_reqs='/without/\n    phrase function=Rela\n/-/')

vi_o_pa_speech = pred_target.format(basis='''
c3:clause
c2:clause
/without/
    phrase function=Rela
/-/
    ca1:clause_atom
    
ca2:clause_atom code=999 instruction=.q

verbumdicendi:word lex=>MR[|DBR[
verbumdicendi = target

c1 = c2
ca1 <mother- ca2
ca1 <: ca2
ca1 [[ p1
c3 [[ ca2
''', pred_funct=all_preds, ptcp_funct=all_ptcp)

test = B.search(vi_o_pa_speech)

4268 results


In [187]:
B.show([(462775,)], withNodes=True)


##### Passage 1


In [191]:
F.pdp.v(185580)

'adjv'

In [188]:
wrong_heads = B.search('''

phrase typ=NP
    word sp=

''')

'מְלָאכָה֙ '

## Developing Animacy Codes

DO NOT DELETE

In [28]:
# ordered in terms of selection preferences, select animate first, etc.
code_priorities = (('(1\.001001[0-9]*)',  # ANIMATE
                   '(1\.00300100[3,6])', 
                   '(1\.00300101[0,3])',
                    '(1\.003001005$)|(1\.003001005)\|', # names of groups (!)
                   ),

                  ('(1\.00100[2-6][0-9]*)',  # INANIMATE
                   '(1\.001$)',
                   '(1\.001)\|',))

animacy_codes = '1\.001[0-9]*|1\.003001[0-9]*|2\.[0-9]*'
animate = '1\.001001[0-9]*|1\.00300100[3,5,6]|1\.003001010'
animacy_count = collections.Counter()


test = B.search(pred_target.format(basis=f'''

    word sem_domain_code~{animacy_codes}

''', pred_funct=all_preds, ptcp_funct=all_ptcp))

for r in test:
    if re.search(animate, F.sem_domain_code.v(r[3])):
        animacy_count['animate'] += 1
    else:
        animacy_count['inanimate'] += 1
        
animacy_count

73468 results


Counter({'animate': 39518, 'inanimate': 33950})

### Are Verbs ever included in animacy?

In [38]:
verb = B.search(f'''

word sem_domain_code~{animacy_codes} pdp=verb

''')

2543 results


In [33]:
#B.show(verb[:5])

These results mean I must exclude verbs from animacy experiments.

### Marking Frame Objects in the HB

In [20]:
# which frame referents are included?

test2 = B.search('''

word sem_domain_code~2\.[0-9]*

''')

testcodes = set()

for r in test2:
    code = F.sem_domain_code.v(r[0])
    code_select = next(code for match in re.findall('(2\.[0-9]*$)|(2\.[0-9]*)\|', code) for code in match if code)
    codei = code.split('|').index(code_select)
    domain = F.sem_domain.v(r[0]).split('|')[codei]
    testcodes.add((domain, code_select))

print(f'{len(testcodes)} test codes selected.')

23306 results
30 test codes selected.


In [21]:
testcodes

{('Clothing', '2.032'),
 ('Conflict', '2.040'),
 ('Control', '2.036'),
 ('Covenant', '2.042'),
 ('Festival', '2.059'),
 ('Fire', '2.060'),
 ('Food', '2.062'),
 ('Joy and Grief', '2.085'),
 ('Land', '2.090'),
 ('Light and Darkness', '2.093'),
 ('Liquids', '2.094'),
 ('Plan', '2.114'),
 ('Plant', '2.115'),
 ('Possession', '2.118'),
 ('Praise', '2.120'),
 ('Quantity', '2.128'),
 ('Respect', '2.132'),
 ('Sacrifice', '2.137'),
 ('Sex', '2.142'),
 ('Shape', '2.143'),
 ('Sin', '2.146'),
 ('Size', '2.147'),
 ('Speed', '2.151'),
 ('Strength', '2.155'),
 ('Time', '2.164'),
 ('Universe', '2.167'),
 ('Vehicle', '2.169'),
 ('Water', '2.171'),
 ('Weight', '2.173'),
 ('Well', '2.177')}

It is fair to mark all of these as "inanimate".