To fix/look at:

* file/directory locations: annotate, standoff
* file_list 
* mentions 
* start_index and end_index mentions

In [1]:
__author__ = 'erwich/sikkel/roorda'

In [2]:
import getopt
import os, sys
from sys import argv, exit, stderr

import collections 
import re
from shutil import rmtree
from glob import glob
from pprint import pprint

from tf.app import use

In [3]:
TAB = '\t'
NL = '\n'
VERSION = 'c'

In [4]:
def error(*args, **kwargs):
    print(*args, file=stderr, **kwargs)
    exit(1)

In [5]:
def Usage():
    stderr.write('usage: [your python version] createcoref book_name first_chapter [last_chapter]\n')
    exit(1)

In [6]:
REFERENCE = f'bh-reference-system/tf/{VERSION}' # Check my GitHub to download extra pgn features
OUTPUT_BASE = os.path.expanduser('~/Sites/brat/data/coref')
ANNOTATE = f'annotate'
STANDOFF = f'standoff'

In [7]:
A = use('bhsa', 
        version = VERSION,
        hoist=globals(),
        mod=('cmerwich/participant-analysis/test/tf:clone,'
             'cmerwich/bh-reference-system/tf,'
             'etcbc/lingo/heads/tf:clone'),
       )

	connecting to online GitHub repo annotation/app-bhsa ... connected
Using TF-app in /Users/Christiaan/text-fabric-data/annotation/app-bhsa/code:
	rv1.2=#5fdf1778d51d938bfe80b37b415e36618e50190c (latest release)
	connecting to online GitHub repo etcbc/bhsa ... connected
Using data in /Users/Christiaan/text-fabric-data/etcbc/bhsa/tf/c:
	rv1.6=#bac4a9f5a2bbdede96ba6caea45e762fe88f88c5 (latest release)
	connecting to online GitHub repo etcbc/phono ... connected
Using data in /Users/Christiaan/text-fabric-data/etcbc/phono/tf/c:
	r1.2=#1ac68e976ee4a7f23eb6bb4c6f401a033d0ec169 (latest release)
	connecting to online GitHub repo etcbc/parallels ... connected
Using data in /Users/Christiaan/text-fabric-data/etcbc/parallels/tf/c:
	r1.2=#395dfe2cb69c261862fab9f0289e594a52121d5c (latest release)
Using data in /Users/Christiaan/github/cmerwich/participant-analysis/test/tf/c:
	repo clone offline under ~/github (local github)
	connecting to online GitHub repo cmerwich/bh-reference-system ... connected

In [8]:
TF.load('g_prs', add=True)

  0.00s loading features ...
   |     0.00s Not enough info for structure in otext, structure functionality will not work
   |     0.16s B g_prs                from /Users/Christiaan/text-fabric-data/etcbc/bhsa/tf/c
  0.18s All additional features loaded - for details use loadLog()


In [9]:
class Mention:
    def __init__(self, name, start=0, end=0, lex='', note='', file=0, suffix=False):
        self.name = name     # Identifier of the mention, e.g. T32
        self.start = start   # Start of the position in the txt file
        self.end = end       # End of the position in the txt file
        self.lex = lex       # Lexical information of the mention
        self.suffix = suffix # Boolean for existence of suffix on word
        self.note = note     # AnnotatorNotes generated by MakeMentions()

In [None]:
def OpenFiles(my_book_name, first_chapter, last_chapter):
    
    my_chapters = set(range(first_chapter, last_chapter+1))
    chapter_list = []
    # code review: pak book node and chapter nodes die je nodig hebt
    for book in F.otype.s('book'):
        book_name = T.bookName(book)

        for chn in L.d(book, 'chapter'):
            chapter = F.chapter.v(chn)
            if (
                (my_book_name and book_name not in my_book_name)
                or 
                (my_chapters and chapter not in my_chapters)
            ):
                error(f'The combination of {my_book_name} \
                      and {first_chapter}--{last_chapter+1} does not exist. \
                      Try again.')
            
            chapter_list.append(chn)
            
            filename = f'{book}_{chapter:>03}'
            filename_txt = f'{filename}.txt'
            filename_ann = f'{filename}.ann'
            filename_tsv = f'{filename}.tsv'
            
            txt_f = open(filename_txt, 'w')
            ann_f = open(filename_ann, 'w')
            tsv_f = open(filename_tsv, 'w')
            
    return txt_f, ann_f, tsv_f, chapter_list

In [None]:
def MakeIndex(word, txtPos, g_word):
   
    # MakeIndex(texts) # make index tabel for words from txt files, 
    # dict: key: node, value: tuple(begin index end index)
    # ook voor Mention.suffix een index maken, kijk naar ReplaceSuffix() voor hints. 
    
    start = txtPos 
    end = txtPos + len(g_word) - 1
    txtPos += len(g_word) + 1
    
    return (word, start, end)

In [10]:
def Replace(g_prs):
    char_set = set('+:@.,;AEIOU')
    if g_prs not in {'', '+'}:
        new_prs = 'A' if g_prs == '+A' else ''.join(char for char in g_prs if char not in char_set)
        return new_prs

In [11]:
def ReplaceSuffix(source_string, replace_what, replace_with):
    '''
    S.rpartition(sep) -> (head, sep, tail)

    Search for the _sep (separator) in S (string), starting at the end of S, and return
    the part before it, the separator itself, and the part after it.  If the
    separator is not found, return two empty strings and S.
    '''
    
    head, _sep, tail = source_string.rpartition(replace_what)
    return head + replace_with + tail, head

In [23]:
def ProcessSuffix(gcons_word, new_prs):        
    if new_prs:
        word_prs, head = ReplaceSuffix(gcons_word, new_prs, f'+{new_prs}')
        return word_prs
    else:
        return gcons_word

def ProcessSpace(gcons_word, trailer, sep):
    if trailer == '':
        return f'{gcons_word}-'
    else:
        return f'{gcons_word}{sep}

def EmitWord(index_dict, w, sep):
    words_list.append(w)
    gcons_word = F.g_cons.v(w)
    trailer = F.trailer.v(w)
    g_prs = F.g_prs.v(w)
    new_prs = Replace(g_prs)
    g_word = ProcessSuffix(gcons_word, new_prs)
    g_word = ProcessSpace(gcons_word, trailer, sep)
    index_dict[w] = MakeIndex(w, txtPos, g_word)
    #writeP(f'{g_word}', txt_f)
    #writeT(f'{txtPos}\t{txtPos + len(gcons_word)}\t{w}\t{gcons_word}\n', tsv_f)
    txtPos += len(g_word)
    
def WriteChapter(chapterNode, txt_f, tsv_f):
    (book, chapter, verse) = T.sectionFromNode(chapterNode, fillup=True)
    words_list = []
    
    txtPos = 0
    
    def writeT(text, fh):
        fh.write(text)
    
    def writeP(text, fh):
        nonlocal txtPos
        fh.write(text)
        txtPos += len(text)
          
    writeP(f'{filename}\n', txt_f)

    header = ['start_index', 'end_index', 'word_node', 'word']
    writeT('{}\n'.format('\t'.join(header)), tsv_f)

    for vn in L.d(chapterNode, 'verse'):
        verse = T.sectionFromNode(vn)[2]
        verse_words = L.d(vn, 'word')

        # write transcription and .tsv
        writeP(f'{verse} ', txt_f)
        
        for i in range(len(verse_words) -1):
            EmitWord(index_dict, verse_words[i], ' ')
        EmitWord(index_dict, verse_words[-1], '\n')
            
        writeP(f'{g_word}', txt_f) # ?

        writeT(f'{txtPos}\t{txtPos + len(gcons_word)}\t{w}\t{gcons_word}\n', tsv_f) # ?
            
        writeP('\n', txt_f) # ?
        
    return txt_f, words_list

In [None]:
def WriteText(txt_f, tsv_f, chapter_list):
    '''
    Write text, ann and tsv files from chapter int(nodes) in chapter_list.
    The chapter nodes are derived from my_book_name specified in CreateCoref()
    '''
    index_dict = {}
    for chn in chapter_list:
        text_f, words_list = WriteChapter(chn, txt_f, tsv_f)
    
    return text_f, chapter_list, words_list

In [None]:
def GetOffsets(texts):
    '''
    The offsets of the text files.
    Makes the cumulative sizes of the text files 
    for which the corresponding .ann files are generated.  
    '''
    
    lst = []
    offset = 0
    for f in texts:
        lst += [offset]
        offset += os.stat(f).st_size    # check file size in bytes 
    lst += [offset]
    return lst

In [None]:
def t(my_book_name, f, l):
    my_chapters = set(range(f, l+1))
    
    c_l = []
    
    for book in F.otype.s('book'):
        
        book_name = T.bookName(book)

        for chn in L.d(book, 'chapter'):
            chapter = F.chapter.v(chn)
            if (
                (my_book_name and book_name not in my_book_name)
                or 
                (my_chapters and chapter not in my_chapters)
            ):
                continue
            c_l.append(chn)
    
    return c_l

chapter_list = t('Psalms', 1, 1)            

def ParseMentions(chapter_nodes):
    Mention_list = []
    l = []
    for chn in chapter_list:
        mnodes = tuple(n for n in L.d(chn) if E.nlink.t(n))
        mention_nodes = [m for m in L.d(chn, 'mention') if F.otype.v(m)]
        for m in mention_nodes:
       
            for phrase in L.u(m, 'phrase'):
                phr_type = F.typ.v(phrase)
                words = L.d(phrase, 'word')
                first_word = words[0]
                last_word = words[-1]
                if phr_type == 'NP':
                    print('NP: ', words)
                else:
                    print(phr_type)
                


ParseMentions(chapter_list)       

In [10]:
def P(my_book_name, f, l):
    Mentions = []
    one_w_lst = []
    two_w_lst = []
    more_w_lst = []
    my_chapters = set(range(f, l+1))
    pdp_choice = {'verb', 'subs', 'nmpr', 'prps', 'prde'}
    t = 0
    for book in F.otype.s('book'):
        book_name = T.bookName(book)
        for chn in L.d(book, 'chapter'):
            chapter = F.chapter.v(chn)
            if (
                (my_book_name and book_name not in my_book_name)
                or 
                (my_chapters and chapter not in my_chapters)
            ):
                continue
            for pa in L.d(chn, 'phrase_atom'):
                pa_typ = F.typ.v(pa)
                pa_words = L.d(pa, 'word')
                pa_text = T.text(pa, fmt='text-trans-plain')
                print(pa_text)
                for word in pa_words:
                    pdp = F.pdp.v(word)
                    st = F.st.v(word)
                    print('\t', pa_typ, F.g_cons.v(word), st, pdp, sep='\t')
P('Psalms', 1, 1)

>CRJ&H>JC 
		NP	>CRJ	c	subs
		NP	H	NA	art
		NP	>JC	a	subs
>CR05 
		CP	>CR	NA	conj
L> 
		NegP	L>	NA	nega
HLK 
		VP	HLK	NA	verb
B<YT RC<JM 
		PP	B	NA	prep
		PP	<YT	c	subs
		PP	RC<JM	a	subs
W
		CP	W	NA	conj
BDRK XV>JM 
		PP	B	NA	prep
		PP	DRK	c	subs
		PP	XV>JM	a	subs
L> 
		NegP	L>	NA	nega
<MD 
		VP	<MD	NA	verb
W
		CP	W	NA	conj
BMWCB LYJM 
		PP	B	NA	prep
		PP	MWCB	c	subs
		PP	LYJM	a	subs
L> 
		NegP	L>	NA	nega
JCB00 
		VP	JCB	NA	verb
KJ >M 
		CP	KJ	NA	conj
		CP	>M	NA	conj
BTWRT JHWH 
		PP	B	NA	prep
		PP	TWRT	c	subs
		PP	JHWH	a	nmpr
XPYW 
		NP	XPYW	a	subs
W
		CP	W	NA	conj
BTWRTW 
		PP	B	NA	prep
		PP	TWRTW	a	subs
JHGH 
		VP	JHGH	NA	verb
JWMM WLJLH00 
		AdvP	JWMM	NA	advb
		AdvP	W	NA	conj
		AdvP	LJLH	a	advb
W
		CP	W	NA	conj
HJH 
		VP	HJH	NA	verb
K<Y 
		PP	K	NA	prep
		PP	<Y	a	subs
CTWL 
		VP	CTWL	a	verb
<L&PLGJ MJM 
		PP	<L	NA	prep
		PP	PLGJ	c	subs
		PP	MJM	a	subs
>CR 
		CP	>CR	NA	conj
PRJW05 
		NP	PRJW	a	subs
JTN 
		VP	JTN	NA	verb
B<TW 
		PP	B	NA	prep
		PP	<TW	a	subs
W
		CP	W	NA	conj
<LHW 
		NP

In [129]:
def ReturnHead(source_string, replace_what, replace_with):
    '''
    S.rpartition(sep) -> (head, sep, tail)

    Search for the _sep (separator) in S (string), starting at the end of S, and return
    the part before it, the separator itself, and the part after it.  If the
    separator is not found, return two empty strings and S.
    '''
    
    head, _sep, tail = source_string.rpartition(replace_what)
    return head

In [170]:
def ParseMentions(my_book_name, f, l):
    Mentions = []
    one_w_lst = []
    two_w_lst = []
    more_w_lst = []
    my_chapters = set(range(f, l+1))
    pdp_choice = {'verb', 'subs', 'nmpr', 'prps', 'prde'}
    t = 0
    for book in F.otype.s('book'):
        book_name = T.bookName(book)
        for chn in L.d(book, 'chapter'):
            chapter = F.chapter.v(chn)
            if (
                (my_book_name and book_name not in my_book_name)
                or 
                (my_chapters and chapter not in my_chapters)
            ):
                continue
            
            np_nodes = tuple(n for n in L.d(chn, 'phrase') if E.nlink.t(n))
            word_nodes = tuple(n for n in L.d(chn, 'word') if E.nlink.t(n))
            phr_nodes = tuple(n for n in L.d(chn, 'phrase') if L.d(n, 'mention'))
            
            i = 0
            for p in phr_nodes:
                words = L.d(p, 'word')
                pdp = F.pdp.v(words)
                typ = F.typ.v(p)
                #print(words, T.text(p), typ, sep='\t')
                mention_words = tuple(w for w in words if L.u(w, 'mention'))
                #prs = F.prs.v(L.d(p, 'word'))
                #prs = F.prs.v(mention_words[0])
                suf = F.mSuf.v(L.d(p, 'mention')[0])
                
                if len(mention_words) <= 1:
                    word_node = mention_words[0]
                    new_prs = Replace(F.g_prs.v(word_node))
                    gcons_word = F.g_cons.v(word_node)
                    
                    # if suffix
                    if new_prs:
                        i += 1
                        m = Mention(f'T{i}', word_node, word_node, new_prs, suffix = True)    
                        #print(new_prs, name, T.text(word, fmt='text-trans-plain'), sep='\t')
                        one_w_lst.append(word_node)
                        Mentions.append(m)
                    # if no suffix
                    else:
                        i += 1
                        m = Mention(f'T{i}', word_node, word_node, gcons_word, suffix = False)    
                        #print('no prs:', name, T.text(mention_words[0], fmt='text-trans-plain'), sep='\t')
                
                elif len(mention_words) <= 2:
                    m0 = mention_words[0]
                    m1 = mention_words[1]
                    boo, chap, ver = T.sectionFromNode(m0)
                    ch_v = f'{chap}:{ver}'
                    new_prs0 = Replace(F.g_prs.v(m0))
                    new_prs1 = Replace(F.g_prs.v(m1))
                    pdp0 = F.pdp.v(m0)
                    pdp1 = F.pdp.v(m1)
                    gcons_word0 = F.g_cons.v(m0)
                    gcons_word1 = F.g_cons.v(m1)
                    #head0 = ReturnHead(gcons_word0, new_prs0, new_prs0)
                    #head1 = ReturnHead(gcons_word1, new_prs1, new_prs1)
                    st0 = F.st.v(m0)
                    #print(ch_v, m0, st0, gcons_word0)
                    
                    ####################
                    
                    if (st0 == 'a' and not new_prs0) and not new_prs1:
                        i += 1
                        m = Mention(f'T{i}', m0, m0, gcons_word0, suffix = False)
                        m = Mention(f'T{i}', m1, m1, gcons_word1, suffix = False)
                          
                    elif (st0 == 'a' and new_prs0) and not new_prs1:
                        head0 = ReturnHead(gcons_word0, new_prs0, new_prs0)
                        i += 1
                        m = Mention(f'T{i}', m0, m0, head0, suffix = False)
                        m = Mention(f'T{i}', m0, m0, new_prs0, suffix = True)
                        m = Mention(f'T{i}', m1, m1, gcons_word1, suffix = False)
                        #print(ch_v, (head0, new_prs0), (gcons_word1), sep='\t')
                    #elif st0 == 'e' and not new_prs1:

                    elif st0 == 'c' and not new_prs1:
                        i += 1
                        m = Mention(f'T{i}', m0, m1, mention_words, suffix = False)
                    
                    elif st0 == 'NA' and not new_prs1:
                        i += 1
                        m = Mention(f'T{i}', m1, m1, gcons_word1, suffix = False)
                        
                    ####################
                    
                    elif (st0 == 'a' and not new_prs0) and new_prs1:
                        if pdp1 != 'prep':
                            i += 1
                            head1 = ReturnHead(gcons_word1, new_prs1, new_prs1)
                            m = Mention(f'T{i}', m0, m0, gcons_word0, suffix = False)
                            m = Mention(f'T{i}', m1, m1, head1, suffix = False)
                            m = Mention(f'T{i}', m1, m1, new_prs1, suffix = True)
                        else:
                            i += 1
                            head1 = ReturnHead(gcons_word1, new_prs1, new_prs1)
                            m = Mention(f'T{i}', m0, m0, gcons_word0, suffix = False)
                            m = Mention(f'T{i}', m1, m1, new_prs1, suffix = True)
                            #print(ch_v, (gcons_word0), pdp0, (head1, new_prs1), pdp1, sep='\t')
                    #elif st0 == 'e' and new_prs1:
                    
                    elif st0 == 'c' and new_prs1:
                        i += 1
                        # possibly like previous:  if pdp1 != 'prep':
                        head1 = ReturnHead(gcons_word1, new_prs1, new_prs1)
                        m = Mention(f'T{i}', m0, m1, f'{gcons_word0} {head1}', suffix = False)
                        m = Mention(f'T{i}', m1, m1, new_prs1, suffix = True)
                        #print(ch_v, (f'{gcons_word0} {head1}'), (gcons_word1, new_prs1), pdp1, sep='\t')
                    
                    elif (st0 == 'NA' and pdp0 in pdp_choice) and new_prs1:
                        i += 1
                        head1 = ReturnHead(gcons_word1, new_prs1, new_prs1)
                        m = Mention(f'T{i}', m0, m0, gcons_word0, suffix = False)
                        m = Mention(f'T{i}', m1, m1, head1, suffix = False)
                        m = Mention(f'T{i}', m1, m1, new_prs1, suffix = True)
                        #print(ch_v, (gcons_word0), pdp0, (head1, new_prs1), pdp1, sep='\t')
                    
                    ####################
                    
                    elif new_prs0 and new_prs1:
                        i += 1
                        t+=1
                        head0 = ReturnHead(gcons_word0, new_prs0, new_prs0)
                        head1 = ReturnHead(gcons_word1, new_prs1, new_prs1)
                        m = Mention(f'T{i}', m0, m0, head0, suffix = False)
                        m = Mention(f'T{i}', m0, m0, new_prs0, suffix = True)
                        m = Mention(f'T{i}', m1, m1, head1, suffix = False)
                        m = Mention(f'T{i}', m1, m1, new_prs1, suffix = True)
                        #print(ch_v, (head0, new_prs0), (head1, new_prs1), sep='\t')
                        
                    ####################
                    
                    two_w_lst.append(mention_words)    
                
                #len mention_words > 2
                else:
                    boo, chap, ver = T.sectionFromNode(mention_words[0])
                    ch_v = f'{chap}:{ver}'
                    for w in mention_words:
                        boo, chap, ver = T.sectionFromNode(w)
                        g_prs = F.g_prs.v(w)
                        new_prs = Replace(g_prs)
                        gcons_word = F.g_cons.v(w)
                        phr_atom = L.u(w, 'phrase_atom')[0]
                        rela = F.rela.v(phr_atom)
                            
                        if new_prs:
                            print(gcons_word)
                    more_w_lst.append(mention_words)
                    
                    print(ch_v, len(mention_words), 
                          T.text(mention_words, fmt='text-trans-plain'), rela, sep='\t')
                
            for npnode in np_nodes:
                wnodes = (L.d(npnode, 'word'))
                
                typ = F.typ.v(npnode)
                #print(npnode, wnodes, T.text(phrnode, fmt='text-trans-plain'), typ, sep='\t')
                
            for wnode in word_nodes:
                pdp = F.pdp.v(wnode)
                g_cons = F.g_cons.v(wnode) #T.text(wnode, fmt='text-trans-plain')
                
                #print(wnode, g_cons, pdp, sep='\t')
    
    print('1 w: ', len(one_w_lst))
    print('2 w: ', len(two_w_lst))
    print('more w: ', len(more_w_lst))
    print(t)
ParseMentions('Psalms', 1, 150)

1:1	3	>CRJ&H>JC 	NA
QDCJ
2:6	3	YJWN HR&QDCJ00 	Appo
3:1	3	MZMWR LDWD 	Spec
BNW
3:1	3	PNJ05 >BCLWM BNW00 	Appo
4:1	3	MZMWR LDWD00 	Spec
DGNM
TJRWCM
4:8	3	DGNM WTJRWCM 	NA
5:1	3	MZMWR LDWD00 	Spec
MLKJ
5:3	3	MLKJ W>LHJ 	NA
5:6	3	KL&P<LJ >WN00 	NA
5:7	4	>JC&DMJM WMRMH 	NA
6:1	3	MZMWR LDWD00 	Spec
6:9	3	KL&P<LJ >WN 	NA
7:1	3	CGJWN LDWD 	Spec
7:1	4	DBRJ&KWC BN&JMJNJ00 	Appo
YDQJ
TMJ
7:9	3	YDQJ TMJ <LJ00 	Spec
7:10	3	LBWT WKLJWT 	NA
7:18	3	CM&JHWH <LJWN00 	NA
8:1	3	MZMWR LDWD00 	Spec
8:2	3	KL&H>RY 	NA
8:3	3	PJ <WLLJM05 JNQJM 	NA
8:3	3	>WJB WMTNQM00 	NA
CMJK
>YB<TJK
8:4	3	CMJK M<FJ >YB<TJK 	Appo
8:4	3	JRX WKWKBJM 	NA
8:6	3	KBWD WHDR 	NA
KLM
8:8	4	YNH W>LPJM KLM 	Spec
8:8	3	GM BHMWT FDJ00 	NA
8:9	3	DGJ HJM 	NA
8:10	3	KL&H>RY00 	NA
9:1	4	<LMWT LBN 	Spec
9:1	3	MZMWR LDWD00 	Spec
MCPVJ
DJNJ
9:5	3	MCPVJ WDJNJ 	NA
9:10	4	MFGB LDK 	Spec
9:10	3	<TWT YRH00 	Spec
9:15	3	C<RJ BT&YJWN 	NA
10:1	3	<TWT YRH00 	Spec
10:7	3	MRMWT WTK 	NA
10:7	3	<ML W>WN00 	NA
10:14	3	<ML WK<S05 	NA
10:16	4	MLK <WLM W<D 	NA
10

72:15	3	KL&HJWM 	NA
72:16	3	<FB H>RY00 	NA
72:18	4	JHWH >LHJM >LHJ JFR>L 	Appo
72:19	3	KL H>RY 	NA
72:20	4	TPLWT DWD BN&JCJ00 	Appo
73:1	3	MZMWR L>SP 	Spec
73:14	3	KL&HJWM 	NA
C>RJ
LBBJ
73:26	3	C>RJ WLBBJ 	NA
LBBJ
XLQJ
73:26	4	YWR&LBBJ WXLQJ 	NA
74:1	3	MFKJL L>SP 	Spec
NXLTK
74:2	4	CBV NXLTK HR&YJWN 	Appo
74:8	3	KL&MW<DJ&>L 	NA
JDK
JMJNK
74:11	3	JDK WJMJNK 	NA
74:12	3	QRB H>RY00 	NA
74:15	3	M<JN WNXL 	NA
74:16	3	M>WR WCMC00 	NA
74:17	3	KL&GBWLWT >RY 	NA
74:17	3	QJY WXRP 	NA
74:21	3	<NJ W>BJWN 	NA
74:22	3	KL&HJWM00 	NA
75:1	3	MZMWR L>SP 	Spec
JCBJH
75:4	4	>RY WKL&JCBJH 	NA
75:9	3	KL RC<J&>RY00 	NA
75:11	3	KL&QRNJ RC<JM 	NA
76:1	3	MZMWR L>SP 	Spec
76:4	7	RCPJ&QCT MGN WXRB WMLXMH 	NA
76:6	3	KL&>NCJ&XJL 	NA
76:7	3	RKB WSWS00 	NA
76:10	3	KL&<NWJ&>RY 	NA
77:6	5	JMJM MQDM CNWT <WLMJM00 	Para
<MK
77:16	5	<MK BNJ&J<QB WJWSP 	Appo
77:21	3	JD&MCH >HRN00 	NA
78:1	3	MFKJL L>SP 	Spec
78:2	3	XJDWT MNJ&QDM00 	Spec
<ZWZW
NPL>WTJW
78:4	6	THLWT JHWH W<ZWZW WNPL>WTJW 	NA
>BWTM
78:8	3	>BWTM DWR DWR 	Spec
7

132:10	3	<BWR DWD <BDK 	Appo
BRJTJ
<DTJ
132:12	3	BRJTJ W<DTJ 	NA
133:1	3	CJR HM<LWT 	NA
133:2	6	CMN HVWB05 HR>C 	Spec
133:2	4	HZQN ZQN&>HRN 	Appo
133:3	4	XJJM <D&H<WLM00 	Spec
134:1	3	CJR HM<LWT 	NA
134:1	3	KL&<BDJ JHWH 	NA
134:3	3	CMJM W>RY00 	NA
>LHJNW
135:2	6	BJT JHWH BXYRWT BJT >LHJNW00 	Appo
135:6	8	CMJM >RY JMJM KL&THWMWT00 	NA
135:7	3	QYH H>RY 	NA
135:9	3	>TWT WMPTJM 	NA
<BDJW
135:9	3	PR<H KL&<BDJW00 	NA
135:11	11	SJXWN05 MLK H>MRJ <WG MLK HBCN KL MMLKWT KN<N00 	Para
135:15	3	<YBJ HGWJM 	NA
135:15	3	KSP WZHB 	NA
135:15	3	M<FH JDJ >DM00 	NA
135:20	3	BJT HLWJ 	NA
136:2	3	>LHJ H>LHJM 	NA
136:3	3	>DNJ H>DNJM 	NA
136:8	3	MMCLT JWM 	Spec
136:9	3	HJRX KWKBJM 	NA
136:9	3	MMCLWT LJLH 	Spec
XJLW
136:15	3	PR<H WXJLW 	NA
136:19	4	SJXWN MLK H>MRJ 	Appo
136:20	4	<WG MLK HBCN 	Appo
136:26	3	>L HCMJM 	NA
BH
137:7	3	HJSWD BH00 	Spec
138:4	3	KL&MLKJ&>RY 	NA
140:1	3	MZMWR LDWD00 	Spec
JCW<TJ
140:8	4	JHWH >DNJ <Z JCW<TJ 	Appo
140:12	3	>JC&XMS R< 	NA
140:13	4	DJN <NJ MCPV >BJNJM00 	Spec
141:1	3	MZMW

In [None]:
def PlaceMentions(mentions, file_table):
    '''
    Relocates the mentions and to the corresponding ann file. 
    '''
    
    i = 0
    for m in mentions:
        i += 1
        file_table[m.file].write(f'T{str(i)}{TAB}Mention {str(m.start)} {str(m.end)}{TAB}{m.lex}{NL}')
        #(f'{m.name}{TAB}Mention {str(m.start)} {str(m.end)}{TAB}{m.lex}{NL}')

In [None]:
def WriteCoref(mention_list, file_table):
    '''
    Writes a coref class with mentions to .ann file object.
    '''
    
    file_table.write(f'*{TAB}Coreference')
    for m in mention_list:
        file_table.write(f' {m}')
    file_table.write('\n')

In [None]:
def PlaceCorefs(corefs, file_table):
    '''
    Creates a 'table' with an empty list of mentions 
    for each *.ann file to be written.
    
    Relocates the mentions to the ann file 
    in which the Mention.name occurs. 
    'table[i]' is a list with Mention.names.
    '''
    
    for c in corefs:
        table = []
        for i in range(len(file_table)):
            table.append([])   
        for m in c:
            table[m.file].append(m.name)
        for i in range(len(table)):
            if len(table[i]) > 1:
                WriteCoref(table[i], file_table[i])

In [None]:
def CloseAnns(ann_file_table): 
    '''
    Close all ann file objects.
    '''
    
    for file in ann_file_table:
        file.close()

In [None]:
def CreateCoref(my_book_name, first_chapter, last_chapter):
    txt_f, ann_f, tsv_f, chapter_list = OpenFiles(my_book_name, first_chapter, last_chapter)
    words_list = WriteText(txt_f, tsv_f, chapter_list)
    
    index_dict = MakeIndex(txt_f, words_list) # make index tabel for words from txt files, 
    # dict: key: node, value: tuple(begin index end index)
    # ook voor Mention.suffix een index maken, kijk naar ReplaceSuffix() voor hints. 
    
    Mentions = ParseMentions(chapter_nodes) # lijst van mention objecten
    
    # zoals in ctt: - als scheidingsteken tussen woorden, spatie tussen woordgroepen
    # + als scheidingsteken tussen suffix en subs
    # txt files die zo gemaakt worden door WriteTxt() indexeren voor m.start en m.end, 
    # en niet de tf text string zoals nu
    # +suffix, g_cons en g_prs gebruiken --> consonant string, matchen achterkant woord  
    
    corefs = ResolveCoref(mentions)
    
    PlaceMentions(mentions, ann_file) #ann_file_table weghalen, niet nodig 
    PlaceCorefs(corefs, ann_file)
    CloseAnn(ann_file)
    
# 3 uitvoer files: .txt, .ann, .tsv

In [None]:
def main(argv):
    try:
        opts, args = getopt.getopt(argv, 'v', [])
    except getopt.GetoptError:
        Usage()
    print(len(args), len(argv))
    if len(args) == 2:
        last_chapter = int(args[1])
    elif len(args) == 3:
        last_chapter = int(args[2])
    else:
        Usage()   
    
    first_chapter = int(args[1])
    book_name = args[0]
    
    CreateCoref(book_name, first_chapter, last_chapter)

if __name__ == "__main__":
    main(argv[1:])

In [None]:
def replace_suffix(source_string, replace_what, replace_with):
    head, _sep, tail = source_string.rpartition(replace_what)
    return head + replace_with + tail, head

def replace(g_prs):
    char_set = set('+:@.,;AEIOU')
    if g_prs not in {'', '+'}:
        new_prs = 'A' if g_prs == '+A' else ''.join(char for char in g_prs if char not in char_set)
        return new_prs

def print_text(book, chapter):
    l=[]
    
    chn = T.nodeFromSection((book, chapter))
    c = 0
    for vn in L.d(chn, 'verse'):
        verse_words = L.d(vn, 'word')
        vt = T.text(L.d(vn,'word'), fmt='text-trans-plain')
        #print(vt)
        
        for w in verse_words:
            gcons_word = F.g_cons.v(w)
            trailer = F.trailer.v(w)
            prs = F.prs.v(w)
            g_prs = F.g_prs.v(w)
            new_prs = replace(g_prs)
            if new_prs:
                #print(gcons_word, new_prs)
                
                if gcons_word.endswith(new_prs) and trailer == '':
                    word, head = replace_suffix(gcons_word, new_prs, f'+{new_prs}-')
                    print(word, head)
                elif gcons_word.endswith(new_prs) and trailer != '':
                    word, head = replace_suffix(gcons_word, new_prs, f'+{new_prs} ')
                    print(word, head)
            if trailer == '':
                nw = f'{gcons_word}-' #trailer_dict[trailer]
                #print(nw)
            else:
                nw = f'{gcons_word} '
                #print(nw)
                
print_text('Psalms', 119)

In [None]:
trailer_dict = {' ': ' ',
    '': '-',
    '&': '-',
    '00 ': ' *',
    '05 ': ' ',
    '00_S ': ' *',
    '00_P ': ' *',
    '_S ': ' *',
    '00_| ': ' *',
    '00_|_P ': ' *',
    '00_|_S ': ' *',
    }
trailer_dict

    
'''f1(b, f, l):
    for c in range(f, l+1):
        do with chapter
        f2(b, c):'''