# Tischendorf MorphGNT to Text-Fabric

In [1]:
import os
import re
from glob import glob
from tf.fabric import Fabric
from tf.convert.walker import CV

## Path Configs

In [89]:
source_repo = os.path.expanduser('~/github/tischendorf-data')
source_dirs = os.path.join(source_repo, 'word-per-line/*')
output_dirs = '../tf/{version}'

## Get Latest Source Data

Pull the latest Tischendorf data from the [source Github directory](https://github.com/morphgnt/tischendorf-data).

In [3]:
!cd $source_repo; git pull origin master

From https://github.com/morphgnt/tischendorf-data
 * branch            master     -> FETCH_HEAD
Already up to date.


## Select Versions

At this stage, I will seek to convert the latest version of Tischendorf.

In [4]:
versions = sorted(glob(source_dirs))
version_dirs = [versions[-1]] # convert latest version only for now

## Processing Data

Intelligence needed to process the text.

In [141]:
bo2book = {line.split()[0]:line.split()[1] for line in '''
MT Matthew
MR Mark
LU Luke
JOH John
AC Acts
RO Romans
1CO 1_Corinthians
2CO 2_Corinthians
GA Galatians
EPH Ephesians
PHP Philippians
COL Colossians
1TH 1_Thessalonians
2TH 2_Thessalonians
1TI 1_Timothy
2TI 2_Timothy
TIT Titus
PHM Philemon
HEB Hebrews
JAS James
1PE 1_Peter
2PE 2_Peter
1JO 1_John
2JO 2_John
3JO 3_John
JUDE Jude
RE Revelation
'''.split('\n') if line}

patts = {'section': re.compile('(\d*):(\d*)\.(\d*)')}

## Write a CV Walk Director to Process the Text

See the documentation for the CV walk class [here](https://annotation.github.io/text-fabric/Create/Convert/).

In [130]:
def director(cv):
        
    '''
    Walks through Tischendorf and triggers
    slot and node creation events.
    '''
        
    # process books in order
    for bo, book in bo2book.items():
        
        book_loc = os.path.join(version_loc, f'Unicode/{bo}.txt')
        
        print(f'\thandling {book_loc}...')
        
        with open(book_loc, 'r') as infile:
            text = [w for w in infile.read().split('\n') if w]
            
        this_book = cv.node('book')
        cv.feature(this_book, book=book)
            
        # keep track of when to trigger paragraph, chapter, and verse objects
        para_track = 1 # keep counts of paragraphs
        prev_chap = 1 # start at 1
        prev_verse = 1 # start at 1
        this_chap = cv.node('chapter')
        this_para = cv.node('paragraph')
        this_verse = cv.node('verse')
        
        # iterate through words and construct objects
        for word in text:
            
            data = word.split()
            word_data, lemmas = data[:7], data[7:]
            
            # segment out word data
            bo_code, ref, brake, ketiv, qere, morph, strongs = word_data
            strongs_lemma, anlex_lemma = ' '.join(lemmas).split('!') # reconstitute lemmas and split on !

            chapt, verse, wrdnum = [int(v) for v in patts['section'].match(ref).groups()]
            
            # -- handle TF events --
            
            # detect chapter boundary
            if prev_chap != chapt:
                
                # end verse
                cv.feature(this_verse, verse=prev_verse)
                cv.terminate(this_verse)
                
                # end chapter
                cv.feature(this_chap, chapter=prev_chap)
                cv.terminate(this_chap)
                
                # new chapter and verse begin
                this_chap = cv.node('chapter')
                prev_chap = chapt
                this_verse = cv.node('verse')
                prev_verse = verse
            
            # detect verse boundary
            elif prev_verse != verse:
                cv.feature(this_verse, verse=prev_verse)
                cv.terminate(this_verse)
                this_verse = cv.node('verse') # start a new verse
                prev_verse = verse
                
            # detect paragraph boundary
            if brake == 'P':
                cv.feature(this_para, para=para_track)
                cv.terminate(this_para)
                this_para = cv.node('paragraph') # start a new paragraph
                para_track += 1 # count paragraphs in the book
                
            # make word object
            this_word = cv.slot()
            cv.feature(this_word, 
                       ketiv=ketiv, 
                       qere=qere, 
                       morph=morph, 
                       strongs=strongs, 
                       vrsnum=wrdnum,
                       str_lem=strongs_lemma,
                       anlex_lem=anlex_lemma
                      )
            cv.terminate(this_word)
        
        # end book and its objects
        # - end verse
        cv.feature(this_verse, verse=prev_verse)
        cv.terminate(this_verse)
        
        # - end paragraph
        cv.feature(this_para, para=para_track)
        cv.terminate(this_para)
        
        # - end chapter
        cv.feature(this_chap, chapter=prev_chap)
        cv.terminate(this_chap)
        
        # - end book
        cv.feature(this_book, book=book)
        cv.terminate(this_book)

## Make the Conversion

### Corpus and TF Feature Metadata

In [142]:
slotType = 'word'
otext = {'fmt:text-orig-full':'{qere} ',
         'sectionTypes':'book,chapter,verse',
         'sectionFeatures':'book,chapter,verse'}

generic = {'Name': 'Morph-GNT Tischendorf',
           'Version': None, # to be filled in
           'Authors': 'G. Clint Yale Tischendorf and Maurice A. Robinson',
           'Editor': 'Ulrik Sandborg-Petersen',
           'Converter': 'Cody Kingham', 
           'Source:':'https://github.com/morphgnt/tischendorf-data/',
           'Note':'Feature descriptions adapted from tischendorf-data README'}

intFeatures = {'chapter', 'para', 'verse'}

featureMeta = {'book': {'description': 'A book name'},
               'chapter': {'description': 'A chapter number'},
               'verse': {'description': 'A verse number'},
               'para': {'description': 'A paragraph number'},
               'ketiv': {'descrption': 'The text as it is written in the printed Tischendorf'},
               'qere': {'description': 'The text as the editor thinks it should have been'},
               'morph': {'description': 'Word morphological tag based on Maurice A Robinson\'s analysis'},
               'strongs': {'description': 'A word\'s number in Strongs'},
               'vrsnum': {'description': 'N-word in verse'},
               'str_lem': {'description': 'Word lemma that corresponds to The NEW Strong\'sComplete Dictionary of Bible Words'},
               'anlex_lem': {'description': 'Word lemma that corresponds to Friberg, Friberg and Miller\'s ANLEX'}
              }

### Conversion

In [131]:
for version_loc in version_dirs:
    
    # configure metadata/output
    version = os.path.basename(version_loc)
    generic['Version'] = version
    
    output = os.path.join(output_dir, version)

    print(f'Processing Version {version}')
    output_dir = output_dirs.format(version=version)

    TF = Fabric(locations=output_dir, silent=True)
    cv = CV(TF)
    
    good = cv.walk(director,
                   slotType,
                   otext=otext,
                   generic=generic,
                   intFeatures=intFeatures,
                   featureMeta=featureMeta,
                   warn=True,
                   force=False,)

Processing Version 2.8
  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |     0.00s No structure nodes will be set up
   |   SECTION   TYPES:    book, chapter, verse
   |   SECTION   FEATURES: book, chapter, verse
   |   STRUCTURE TYPES:    
   |   STRUCTURE FEATURES: 
   |   TEXT      FEATURES:
   |      |   text-orig-full       qere
   |     0.02s OK
   |     0.00s Following director... 
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8/Unicode/MT.txt...
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8/Unicode/MR.txt...
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8/Unicode/LU.txt...
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8/Unicode/JOH.txt...
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8/Unicode/AC.txt...
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8/Unicode/RO.txt...
	handling /Users/cody/github/tischendorf-data/word-per-line/2.8

## Test Load

In [145]:
TF = Fabric(locations=output_dir)
api = TF.load('''

book chapter verse para
ketiv

''')

classes = api.makeAvailableIn(globals())

This is Text-Fabric 7.8.5
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

14 features found and 0 ignored
  0.00s loading features ...
   |     0.00s No structure info in otext, the structure part of the T-API cannot be used
   |     0.00s T para                 from /Users/cody/github/tischendorf_tf/tf/2.8
  0.27s All features loaded/computed - for details use loadLog()


In [146]:
show_book = T.nodeFromSection(('1_John',))

for verse in L.d(show_book, 'verse'):
    
    print('{} {}:{}'.format(*T.sectionFromNode(verse)))
    print('\t\t', T.text(verse))

1_John 1:1
		 Ὃ ἦν ἀπ’ ἀρχῆς, ὃ ἀκηκόαμεν, ὃ ἑωράκαμεν τοῖς ὀφθαλμοῖς ἡμῶν, ὃ ἐθεασάμεθα καὶ αἱ χεῖρες ἡμῶν ἐψηλάφησαν, περὶ τοῦ λόγου τῆς ζωῆς 
1_John 1:2
		 καὶ ἡ ζωὴ ἐφανερώθη, καὶ ἑωράκαμεν καὶ μαρτυροῦμεν καὶ ἀπαγγέλλομεν ὑμῖν τὴν ζωὴν τὴν αἰώνιον ἥτις ἦν πρὸς τὸν πατέρα καὶ ἐφανερώθη ἡμῖν 
1_John 1:3
		 ὃ ἑωράκαμεν καὶ ἀκηκόαμεν ἀπαγγέλλομεν καὶ ὑμῖν, ἵνα καὶ ὑμεῖς κοινωνίαν ἔχητε μεθ’ ἡμῶν. καὶ ἡ κοινωνία δὲ ἡ ἡμετέρα μετὰ τοῦ πατρὸς καὶ μετὰ τοῦ υἱοῦ αὐτοῦ Ἰησοῦ Χριστοῦ. 
1_John 1:4
		 καὶ ταῦτα γράφομεν ἡμεῖς ἵνα ἡ χαρὰ ἡμῶν ᾖ πεπληρωμένη. 
1_John 1:5
		 Καὶ ἔστιν αὕτη ἡ ἀγγελία ἣν ἀκηκόαμεν ἀπ’ αὐτοῦ καὶ ἀναγγέλλομεν ὑμῖν, ὅτι ὁ θεὸς φῶς ἐστιν καὶ σκοτία ἐν αὐτῷ οὐκ ἔστιν οὐδεμία. 
1_John 1:6
		 ἐὰν εἴπωμεν ὅτι κοινωνίαν ἔχομεν μετ’ αὐτοῦ καὶ ἐν τῷ σκότει περιπατῶμεν, ψευδόμεθα καὶ οὐ ποιοῦμεν τὴν ἀλήθειαν· 
1_John 1:7
		 ἐὰν δὲ ἐν τῷ φωτὶ περιπατῶμεν ὡς αὐτός ἐστιν ἐν τῷ φωτί, κοινωνίαν ἔχομεν μετ’ ἀλλήλων καὶ τὸ αἷμα Ἰησοῦ τοῦ υἱοῦ αὐτοῦ καθαρίζει ἡμᾶς ἀπὸ πάσης ἁμαρτίας. 


In [157]:
T.sectionFromNode(100535)

('2_Corinthians', 11, 17)

In [163]:
for para in L.d(show_book, 'paragraph'):
    print(F.book.v(show_book), '§', F.para.v(para))
    
    for verse in L.d(para, 'verse'):
        book,chapter,vrs = T.sectionFromNode(verse)
        print(f'({chapter}:{vrs}) {T.text(verse)}', end='')
    print('\n')

1_John § 1
(1:1) Ὃ ἦν ἀπ’ ἀρχῆς, ὃ ἀκηκόαμεν, ὃ ἑωράκαμεν τοῖς ὀφθαλμοῖς ἡμῶν, ὃ ἐθεασάμεθα καὶ αἱ χεῖρες ἡμῶν ἐψηλάφησαν, περὶ τοῦ λόγου τῆς ζωῆς (1:2) καὶ ἡ ζωὴ ἐφανερώθη, καὶ ἑωράκαμεν καὶ μαρτυροῦμεν καὶ ἀπαγγέλλομεν ὑμῖν τὴν ζωὴν τὴν αἰώνιον ἥτις ἦν πρὸς τὸν πατέρα καὶ ἐφανερώθη ἡμῖν (1:3) ὃ ἑωράκαμεν καὶ ἀκηκόαμεν ἀπαγγέλλομεν καὶ ὑμῖν, ἵνα καὶ ὑμεῖς κοινωνίαν ἔχητε μεθ’ ἡμῶν. καὶ ἡ κοινωνία δὲ ἡ ἡμετέρα μετὰ τοῦ πατρὸς καὶ μετὰ τοῦ υἱοῦ αὐτοῦ Ἰησοῦ Χριστοῦ. (1:4) καὶ ταῦτα γράφομεν ἡμεῖς ἵνα ἡ χαρὰ ἡμῶν ᾖ πεπληρωμένη. 

1_John § 2
(1:5) Καὶ ἔστιν αὕτη ἡ ἀγγελία ἣν ἀκηκόαμεν ἀπ’ αὐτοῦ καὶ ἀναγγέλλομεν ὑμῖν, ὅτι ὁ θεὸς φῶς ἐστιν καὶ σκοτία ἐν αὐτῷ οὐκ ἔστιν οὐδεμία. (1:6) ἐὰν εἴπωμεν ὅτι κοινωνίαν ἔχομεν μετ’ αὐτοῦ καὶ ἐν τῷ σκότει περιπατῶμεν, ψευδόμεθα καὶ οὐ ποιοῦμεν τὴν ἀλήθειαν· (1:7) ἐὰν δὲ ἐν τῷ φωτὶ περιπατῶμεν ὡς αὐτός ἐστιν ἐν τῷ φωτί, κοινωνίαν ἔχομεν μετ’ ἀλλήλων καὶ τὸ αἷμα Ἰησοῦ τοῦ υἱοῦ αὐτοῦ καθαρίζει ἡμᾶς ἀπὸ πάσης ἁμαρτίας. (1:8) ἐὰν εἴπωμεν ὅτι ἁμαρτίαν οὐκ ἔχομ