# CCAT to TF Conversion

In this notebook I extract the CCAT data from its plain text, online format and process it into a Text-Fabric data format. 



In [1]:
import os, sys, requests, time, glob, collections, datetime
from tf.fabric import Fabric

## Harvest Data Files from ccat.sas.upen.edu

N.B. that there is a 2 second delay between each page (book) request to avoid stressing the server.

In [2]:
# harvests the data from CCAT website; toggle False if already done
request_new_data = False

# data paths 
source_directory = 'source'
tf_directory = 'tf'
morph_dir = os.path.join(source_directory, 'morphology')
paral_dir = os.path.join(source_directory, 'parallel')


morph_url = 'http://ccat.sas.upenn.edu/gopher/text/religion/biblical/lxxmorph/{}'
paral_url = 'http://ccat.sas.upenn.edu/gopher/text/religion/biblical/parallel/{}'

# pasted from http://ccat.sas.upenn.edu/gopher/text/religion/biblical/lxxmorph
morph_books = '''
[   ]	01.Gen.1.mlxx	12-Jul-1994 15:10	711K
[   ]	02.Gen.2.mlxx	12-Jul-1994 15:11	673K
[   ]	03.Exod.mlxx	12-Jul-1994 15:05	1.0M
[   ]	04.Lev.mlxx	03-Aug-2015 15:13	812K
[   ]	05.Num.mlxx	10-Aug-2015 16:07	1.0M
[   ]	06.Deut.mlxx	06-Aug-2015 15:45	1.0M
[   ]	07.JoshB.mlxx	12-Jul-1994 15:55	638K
[   ]	08.JoshA.mlxx	12-Jul-1994 16:09	46K
[   ]	09.JudgesB.mlxx	12-Jul-1994 16:10	667K
[   ]	10.JudgesA.mlxx	12-Jul-1994 16:11	683K
[   ]	11.Ruth.mlxx	12-Jul-1994 16:12	88K
[   ]	12.1Sam.mlxx	04-Aug-2015 16:39	862K
[   ]	13.2Sam.mlxx	04-Aug-2015 16:43	766K
[   ]	14.1Kings.mlxx	06-Aug-2015 15:30	888K
[   ]	15.2Kings.mlxx	12-Jul-1994 16:41	807K
[   ]	16.1Chron.mlxx	12-Jul-1994 16:41	692K
[   ]	17.2Chron.mlxx	03-Aug-2015 16:30	910K
[   ]	18.1Esdras.mlxx	12-Jul-1994 16:58	387K
[   ]	19.2Esdras.mlxx	19-Jul-1994 12:45	568K
[   ]	20.Esther.mlxx	12-Jul-1994 16:59	251K
[   ]	21.Judith.mlxx	12-Jul-1994 16:59	392K
[   ]	22.TobitBA.mlxx	13-Jul-1994 09:41	236K
[   ]	23.TobitS.mlxx	13-Jul-1994 09:41	308K
[   ]	24.1Macc.mlxx	13-Jul-1994 09:42	791K
[   ]	25.2Macc.mlxx	03-Aug-2015 16:50	519K
[   ]	26.3Macc.mlxx	13-Jul-1994 09:55	223K
[   ]	27.4Macc.mlxx	19-Jul-1994 12:46	341K
[   ]	28.Psalms1.mlxx	13-Jul-1994 11:21	752K
[   ]	29.Psalms2.mlxx	06-Aug-2015 16:10	750K
[   ]	30.Odes.mlxx	13-Jul-1994 11:53	180K
[   ]	31.Proverbs.mlxx	31-Mar-2015 09:11	490K
[   ]	32.Qoheleth.mlxx	13-Jul-1994 11:54	193K
[   ]	33.Canticles.mlxx	13-Jul-1994 11:54	87K
[   ]	34.Job.mlxx	13-Jul-1994 11:54	589K
[   ]	35.Wisdom.mlxx	13-Jul-1994 12:53	301K
[   ]	36.Sirach.mlxx	10-Aug-2015 16:04	815K
[   ]	37.PsSol.mlxx	13-Jul-1994 13:36	212K
[   ]	38.Hosea.mlxx	13-Jul-1994 13:37	170K
[   ]	39.Micah.mlxx	13-Jul-1994 13:37	102K
[   ]	40.Amos.mlxx	13-Jul-1994 13:38	138K
[   ]	41.Joel.mlxx	13-Jul-1994 13:38	68K
[   ]	42.Jonah.mlxx	13-Jul-1994 13:38	47K
[   ]	43.Obadiah.mlxx	13-Jul-1994 13:40	20K
[   ]	44.Nahum.mlxx	06-Aug-2015 15:41	40K
[   ]	45.Habakkuk.mlxx	13-Jul-1994 13:41	48K
[   ]	46.Zeph.mlxx	13-Jul-1994 13:41	53K
[   ]	47.Haggai.mlxx	13-Jul-1994 13:41	40K
[   ]	48.Zech.mlxx	13-Jul-1994 13:42	213K
[   ]	49.Malachi.mlxx	13-Jul-1994 13:42	61K
[   ]	50.Isaiah1.mlxx	04-Aug-2015 16:33	672K
[   ]	51.Isaiah2.mlxx	03-Aug-2015 15:22	485K
[   ]	52.Jer1.mlxx	03-Aug-2015 15:51	639K
[   ]	53.Jer2.mlxx	06-Aug-2015 15:56	600K
[   ]	54.Baruch.mlxx	13-Jul-1994 14:27	111K
[   ]	55.EpJer.mlxx	13-Jul-1994 14:29	56K
[   ]	56.Lam.mlxx	04-Aug-2015 16:30	105K
[   ]	57.Ezek1.mlxx	13-Jul-1994 15:04	601K
[   ]	58.Ezek2.mlxx	13-Jul-1994 15:04	663K
[   ]	59.BelOG.mlxx	19-Jul-1994 12:46	38K
[   ]	60.BelTh.mlxx	19-Jul-1994 12:46	37K
[   ]	61.DanielOG.mlxx	10-Aug-2015 16:14	460K
[   ]	62.DanielTh.mlxx	19-Jul-1994 12:46	446K
[   ]	63.SusOG.mlxx	19-Jul-1994 12:46	34K
[   ]	64.SusTh.mlxx	19-Jul-1994 12:47	49K
'''

# pasted from http://ccat.sas.upenn.edu/gopher/text/religion/biblical/parallel
paral_books = '''
[   ]	01.Genesis.par	08-Dec-1999 10:27	379K
[   ]	02.Exodus.par	05-Apr-1994 17:36	318K
[   ]	03.Lev.par	05-Apr-1994 17:36	225K
[   ]	04.Num.par	05-Apr-1994 17:36	299K
[   ]	05.Deut.par	05-Apr-1994 17:36	265K
[   ]	06.JoshB.par	05-Apr-1994 17:36	198K
[   ]	07.JoshA.par	05-Apr-1994 17:36	13K
[   ]	08.JudgesB.par	05-Apr-1994 17:36	178K
[   ]	09.JudgesA.par	02-Sep-1994 16:53	183K
[   ]	10.Ruth.par	05-Apr-1994 17:36	23K
[   ]	11.1Sam.par	05-Apr-1994 17:36	244K
[   ]	12.2Sam.par	05-Apr-1994 17:36	209K
[   ]	13.1Kings.par	05-Apr-1994 17:36	302K
[   ]	14.2Kings.par	05-Apr-1994 17:36	220K
[   ]	15.1Chron.par	05-Apr-1994 17:36	191K
[   ]	16.2Chron.par	18-Feb-2005 15:18	242K
[   ]	17.1Esdras.par	05-Apr-1994 17:36	161K
[   ]	18.Esther.par	05-Apr-1994 17:36	84K
[   ]	18.Ezra.par	05-Apr-1994 17:36	70K
[   ]	19.Neh.par	05-Apr-1994 17:36	94K
[   ]	20.Psalms.par	05-Apr-1994 17:36	533K
[   ]	22.Ps151.par	05-Apr-1994 17:36	1.8K
[   ]	23.Prov.par	05-Apr-1994 17:36	158K
[   ]	24.Qoh.par	05-Apr-1994 17:36	48K
[   ]	25.Cant.par	25-Mar-2015 11:34	24K
[   ]	26.Job.par	05-Apr-1994 17:36	183K
[   ]	27.Sirach.par	05-Apr-1994 17:36	289K
[   ]	28.Hosea.par	05-Apr-1994 17:36	46K
[   ]	29.Micah.par	05-Apr-1994 17:36	27K
[   ]	30.Amos.par	05-Apr-1994 17:36	37K
[   ]	31.Joel.par	05-Apr-1994 17:36	18K
[   ]	32.Jonah.par	05-Apr-1994 17:36	13K
[   ]	33.Obadiah.par	05-Apr-1994 17:36	5.4K
[   ]	34.Nahum.par	05-Apr-1994 17:36	11K
[   ]	35.Hab.par	05-Apr-1994 17:36	13K
[   ]	36.Zeph.par	05-Apr-1994 17:36	14K
[   ]	37.Haggai.par	05-Apr-1994 17:36	11K
[   ]	38.Zech.par	05-Apr-1994 17:36	57K
[   ]	39.Malachi.par	05-Apr-1994 17:36	17K
[   ]	40.Isaiah.par	05-Apr-1994 17:36	334K
[   ]	41.Jer.par	05-Apr-1994 17:36	461K
[   ]	42.Baruch.par	05-Apr-1994 17:36	16K
[   ]	43.Lam.par	05-Apr-1994 17:36	30K
[   ]	44.Ezekiel.par	05-Apr-1994 17:36	359K
[   ]	45.DanielOG.par	05-Apr-1994 17:36	177K
[   ]	46.DanielTh.par	05-Apr-1994 17:36	143K
'''

# clean the book names
morph_books = [book.split('\t')[1] for book in morph_books.split('\n')
                  if book]
paral_books = [book.split('\t')[1] for book in paral_books.split('\n')
                  if book]

# request the book data

if request_new_data:
    
    # make data directory
    if not os.path.isdir(source_directory):
        os.mkdir(source_directory)
    
    # big loop, grabs per data source for every book
    for directory, books, base_url in ((morph_dir, morph_books, morph_url), 
                                       (paral_dir, paral_books, paral_url)
                                      ):
        
        # make the directory for the data type
        if not os.path.isdir(directory):
            os.mkdir(directory)
        
        print(f'harvesting data from {base_url}')

        # get data per book
        for book in books:
            
            book_url = base_url.format(book) # format url
            book_file = os.path.join(directory, book) # name the book's data file
            
            # write to the book's data file
            with open(book_file, 'w') as outfile:
                print(f'\t|writing data for {book}...')
                outfile.write(requests.get(book_url).text) # harvest/save the data from website
                print(f'\t|\tfile written...')
                time.sleep(1) # be nice to the server and wait in between books
        print('done!')
else:
    print('Fetch new data toggled False. Doing nothing...')

Fetch new data toggled False. Doing nothing...


## Build TF Resource

Starting out the database will have have 4 objects:
    1. word
    2. book
    3. chapter
    4. verse
 
The following dicts are needed:
    1. otype
    2. oslots
    3. book
    4. chapter
    5. verse
    6. trans [transcription]
    7. morph [morphology]
 
The morph feature is plain text and tab separated—straight from the source.

### Prepare Book Names

In [3]:
# print out a copy/pastable link of file names to add to a dictionary

if False: # change to True for printout
    print(',\n'.join(['\'' + book + '\':' for book in morph_books]))

In [4]:
# map book names (some books are split up); filenames copied/pasted from above printout
book_names = {    
    '01.Gen.1.mlxx':'Genesis',
    '02.Gen.2.mlxx':'Genesis',
    '03.Exod.mlxx':'Exodus',
    '04.Lev.mlxx':'Leviticus',
    '05.Num.mlxx':'Numbers',
    '06.Deut.mlxx':'Deuteronomy',
    '07.JoshB.mlxx':'Joshua_B',
    '08.JoshA.mlxx':'Joshua_A',
    '09.JudgesB.mlxx':'Judges_B',
    '10.JudgesA.mlxx':'Judges_A',
    '11.Ruth.mlxx':'Ruth',
    '12.1Sam.mlxx':'1_Samuel',
    '13.2Sam.mlxx':'2_Samuel',
    '14.1Kings.mlxx':'1_Kings',
    '15.2Kings.mlxx':'2_Kings',
    '16.1Chron.mlxx':'1_Chronicles',
    '17.2Chron.mlxx':'2_Chronicles',
    '18.1Esdras.mlxx':'1_Esdras',
    '19.2Esdras.mlxx':'2_Esdras',
    '20.Esther.mlxx':'Esther',
    '21.Judith.mlxx':'Judith',
    '22.TobitBA.mlxx':'Tobit_BA',
    '23.TobitS.mlxx':'Tobit_S',
    '24.1Macc.mlxx':'1_Maccabees',
    '25.2Macc.mlxx':'2_Maccabees',
    '26.3Macc.mlxx':'3_Maccabees',
    '27.4Macc.mlxx':'4_Maccabees',
    '28.Psalms1.mlxx':'Psalms',
    '29.Psalms2.mlxx':'Psalms',
    '30.Odes.mlxx':'Odes',
    '31.Proverbs.mlxx':'Proverbs',
    '32.Qoheleth.mlxx':'Qoheleth',
    '33.Canticles.mlxx':'Canticles',
    '34.Job.mlxx':'Job',
    '35.Wisdom.mlxx':'Wisdom',
    '36.Sirach.mlxx':'Sirach',
    '37.PsSol.mlxx':'Psalms_of_Solomon',
    '38.Hosea.mlxx':'Hosea',
    '39.Micah.mlxx':'Micah',
    '40.Amos.mlxx':'Amos',
    '41.Joel.mlxx':'Joel',
    '42.Jonah.mlxx':'Jonah',
    '43.Obadiah.mlxx':'Obadiah',
    '44.Nahum.mlxx':'Nahum',
    '45.Habakkuk.mlxx':'Habakkuk',
    '46.Zeph.mlxx':'Zephaniah',
    '47.Haggai.mlxx':'Haggai',
    '48.Zech.mlxx':'Zechariah',
    '49.Malachi.mlxx':'Malachi',
    '50.Isaiah1.mlxx':'Isaiah',
    '51.Isaiah2.mlxx':'Isaiah',
    '52.Jer1.mlxx':'Jeremiah',
    '53.Jer2.mlxx':'Jeremiah',
    '54.Baruch.mlxx':'Baruch',
    '55.EpJer.mlxx':'Epistle_of_Jeremiah',
    '56.Lam.mlxx':'Lamentations',
    '57.Ezek1.mlxx':'Ezekiel',
    '58.Ezek2.mlxx':'Ezekiel',
    '59.BelOG.mlxx':'Bel_and_Dragon_OG',
    '60.BelTh.mlxx':'Bel_and_Dragon_Th',
    '61.DanielOG.mlxx':'Daniel_OG',
    '62.DanielTh.mlxx':'Daniel_Th',
    '63.SusOG.mlxx':'Susanna_OG',
    '64.SusTh.mlxx':'Susanna_Th' 
}

### Count and Map Slots

In [5]:
nodes = collections.defaultdict(dict) # mapping from node to feature
books = collections.defaultdict(list) # mapping from book to its slots
chapters = collections.defaultdict(list) # chapter 2 slots
verses = collections.defaultdict(list) # verse 2 slots

# creat slots
slot = 1
for morph_book in morph_books:
    
    data_file = os.path.join(morph_dir, morph_book)
    
    print(f'processing slots for {morph_book}')
    
    with open(data_file, 'r') as book_data:
        
        this_book = book_names[morph_book]
        
        for line in book_data:
            
            data = line.strip().split()
            
            # length of 0/1 is either blank line or section marker with no chapter/verse label
            if len(data) == 1 and data[0] == '':
                continue
            # exception for some superscriptions or in-doubt texts w/out chapter:verse label
            elif len(data) == 1 and data[0] != '': 
                data.append('0:0') # place-holder chapter:verse
            
            # length of 2 is a verse marker
            if len(data) == 2:
                
                # format chapter for single chapter books, e.g. Obadiah
                if ':' not in data[1]:
                    data[1] = '1:' + data[1]
                
                # assign chapter/verse
                this_chapter, this_verse = data[1].split(':')
                
            # length > 2 is a slot
            elif len(data) > 2:
                
                # get slot data
                trans = data[0]
                morph = '.'.join(data[1:]) # morpho data into dot-separated string, disambiguate later
                
                # save slot data
                nodes['otype'][slot] = 'word'
                nodes['trans'][slot] = trans
                nodes['morph'][slot] = morph
                nodes['trailer'][slot] = ' '# simple whitespace trailer 
                books[this_book].append(slot)
                chapters[(this_book, this_chapter)].append(slot)
                verses[(this_book, this_chapter, this_verse)].append(slot)
                
                # up the slot by 1
                slot += 1
                
        print(f'\t|{len(books[this_book])} slots logged')
        
print('done...')
print(f'  {slot-1} slots created...')
print(f'  {len(books)} books created...')
print(f'  {len(chapters)} chapters created...')
print(f'  {len(verses)} verses created...')

processing slots for 01.Gen.1.mlxx
	|16774 slots logged
processing slots for 02.Gen.2.mlxx
	|32566 slots logged
processing slots for 03.Exod.mlxx
	|24816 slots logged
processing slots for 04.Lev.mlxx
	|19082 slots logged
processing slots for 05.Num.mlxx
	|25059 slots logged
processing slots for 06.Deut.mlxx
	|22990 slots logged
processing slots for 07.JoshB.mlxx
	|14896 slots logged
processing slots for 08.JoshA.mlxx
	|1064 slots logged
processing slots for 09.JudgesB.mlxx
	|15580 slots logged
processing slots for 10.JudgesA.mlxx
	|15947 slots logged
processing slots for 11.Ruth.mlxx
	|2072 slots logged
processing slots for 12.1Sam.mlxx
	|20131 slots logged
processing slots for 13.2Sam.mlxx
	|17927 slots logged
processing slots for 14.1Kings.mlxx
	|20803 slots logged
processing slots for 15.2Kings.mlxx
	|18853 slots logged
processing slots for 16.1Chron.mlxx
	|16244 slots logged
processing slots for 17.2Chron.mlxx
	|21353 slots logged
processing slots for 18.1Esdras.mlxx
	|8994 slots l

### Correction for Slot 99039

Slot 99039 contains a very long word: "SUGKATAKLHRONOMHQH/SONTAI". This word is so long that there is no space between it and the type tag. The `split()` above thus mistakenly combines the surface form of the word with the type code. Based on a search of word length, this appears to be the only word with this problem. We will consider this a problem of conversion rather than a problem with the data source itself. Thus, instead of introducing an edit in the enrichments notebook, we make the change here.

In [6]:
nodes['trans'][99039] = nodes['trans'][99039][:25] # cut off the tag
nodes['morph'][99039] = 'VC.' + nodes['morph'][99039]

In [7]:
print(nodes['trans'][99039])
print(nodes['morph'][99039])

SUGKATAKLHRONOMHQH/SONTAI
VC.APS2S.KLHRONOME/W.SUN.KATA


### Count and Map Nodes

Objects of slots are mapped to the oslot feature.

The oslot feature is an edge feature with node as key and list/set as value. 

In [8]:
edges = {'oslots':{}}

# order book, chapter, verse by minimum slot number
ordered_book =  [book[1] for book in sorted((books[bk][0], bk) for bk in books)]
ordered_chap = [chap[1] for chap in sorted((chapters[ch][0], ch) for ch in chapters)]
ordered_vers = [vers[1] for vers in sorted((verses[vs][0], vs) for vs in verses)]

node = slot - 1 # slot is upped below

# create book data
for book in ordered_book:
    node += 1
    nodes['otype'][node] = 'book'
    nodes['book'][node] = book
    nodes['book@en'][node] = book
    edges['oslots'][node] = books[book]

# create chapter data
for book, chapter in ordered_chap:
    node += 1
    nodes['otype'][node] = 'chapter'
    nodes['chapter'][node] = str(chapter)
    edges['oslots'][node] = chapters[(book, chapter)]
    
# create verse data
for book, chapter, verse in ordered_vers:
    node += 1
    nodes['otype'][node] = 'verse'
    nodes['verse'][node] = str(verse)
    edges['oslots'][node] = verses[(book, chapter, verse)]

    
print(node-(slot-1), 'nodes created...')

31862 nodes created...


### Format Metadata and Export

In [9]:
metadata = {
    
    '': {            
            'created_by':'R. Kraft et al., CCAT, University of Pennsylvania',
            'converted_by':'Cody Kingham',
            'source':'http://ccat.sas.upenn.edu/rak//catss.html',
            'license':'http://ccat.sas.upenn.edu/gopher/text/religion/biblical/lxxmorph/0-user-declaration.txt',
        }
            ,
    'otext': {
        'sectionFeatures': 'book,chapter,verse',
        'sectionTypes': 'book,chapter,verse',
        'fmt:text-orig-plain': '{trans}{trailer}',
    },
    'book@en': {
        'valueType': 'str',
        'language': 'English',
        'languageCode': 'en',
        'languageEnglish': 'english',
    },
}

# add feature metadata
integers = {}

for nf in nodes:
    metadata[nf] = {'valueType': 'str' if nf not in integers else 'int'}
for ef in edges:
    metadata[ef] = {'valueType': 'str' if ef not in integers else 'int'}

In [10]:
# bring in TF engine for export
TF = Fabric(tf_directory)

This is Text-Fabric 3.1.1
Api reference : https://github.com/Dans-labs/text-fabric/wiki/Api
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

0 features found and 0 ignored


  0.00s Grid feature "otype" not found in

  0.01s Grid feature "oslots" not found in



  0.01s Grid feature "otext" not found. Working without Text-API



In [11]:
TF.save(nodeFeatures=nodes, edgeFeatures=edges, metaData=metadata)

  0.00s Exporting 8 node and 1 edge and 1 config features to tf:
   |     0.00s T book                 to tf
   |     0.00s T book@en              to tf
   |     0.01s T chapter              to tf
   |     0.87s T morph                to tf
   |     0.20s T otype                to tf
   |     0.73s T trailer              to tf
   |     0.80s T trans                to tf
   |     0.05s T verse                to tf
   |     0.27s T oslots               to tf
   |     0.00s M otext                to tf
  2.94s Exported 8 node features and 1 edge features and 1 config features to tf


## Test Resulting Data

In [12]:
TF = Fabric(locations=['tf'], modules=[''])

This is Text-Fabric 3.1.1
Api reference : https://github.com/Dans-labs/text-fabric/wiki/Api
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

10 features found and 0 ignored


In [13]:
api = TF.load('''
                book chapter verse
                trans morph
              '''
             )

api.makeAvailableIn(globals())

  0.00s loading features ...
   |     0.22s T otype                from tf
   |     1.22s T oslots               from tf
   |     0.01s T book                 from tf
   |     0.01s T chapter              from tf
   |     0.10s T verse                from tf
   |     1.45s T trailer              from tf
   |     1.83s T trans                from tf
   |      |     0.03s C __levels__           from otype, oslots
   |      |     2.56s C __order__            from otype, oslots, __levels__
   |      |     0.27s C __rank__             from otype, __order__
   |      |     3.61s C __levUp__            from otype, oslots, __rank__
   |      |     0.08s C __levDown__          from otype, __levUp__, __rank__
   |      |     1.12s C __boundary__         from otype, oslots, __rank__
   |     0.00s M otext                from tf
   |      |     0.11s C __sections__         from otype, oslots, otext, __levUp__, __levels__, book, chapter, verse
   |     1.65s T morph                from tf
   |     

### Sample the Books that Are There

In [14]:
for book in F.otype.s('book'):
    
    print(F.book.v(book))

Genesis
Exodus
Leviticus
Numbers
Deuteronomy
Joshua_B
Joshua_A
Judges_B
Judges_A
Ruth
1_Samuel
2_Samuel
1_Kings
2_Kings
1_Chronicles
2_Chronicles
1_Esdras
2_Esdras
Esther
Judith
Tobit_BA
Tobit_S
1_Maccabees
2_Maccabees
3_Maccabees
4_Maccabees
Psalms
Odes
Proverbs
Qoheleth
Canticles
Job
Wisdom
Sirach
Psalms_of_Solomon
Hosea
Micah
Amos
Joel
Jonah
Obadiah
Nahum
Habakkuk
Zephaniah
Haggai
Zechariah
Malachi
Isaiah
Jeremiah
Baruch
Epistle_of_Jeremiah
Lamentations
Ezekiel
Bel_and_Dragon_OG
Bel_and_Dragon_Th
Daniel_OG
Daniel_Th
Susanna_OG
Susanna_Th


### Sample the Plain Text Data

In [15]:
gen_1 = T.nodeFromSection(('Genesis', '1'))

for verse in L.d(gen_1, otype='verse'):
    
    words = L.d(verse, otype='word')
    
    print()
    print(F.verse.v(verse))
    print(T.text(words, fmt='text-orig-plain'))


1
E)N A)RXH=| E)POI/HSEN O( QEO\S TO\N OU)RANO\N KAI\ TH\N GH=N 

2
H( DE\ GH= H)=N A)O/RATOS KAI\ A)KATASKEU/ASTOS KAI\ SKO/TOS E)PA/NW TH=S A)BU/SSOU KAI\ PNEU=MA QEOU= E)PEFE/RETO E)PA/NW TOU= U(/DATOS 

3
KAI\ EI)=PEN O( QEO/S GENHQH/TW FW=S KAI\ E)GE/NETO FW=S 

4
KAI\ EI)=DEN O( QEO\S TO\ FW=S O(/TI KALO/N KAI\ DIEXW/RISEN O( QEO\S A)NA\ ME/SON TOU= FWTO\S KAI\ A)NA\ ME/SON TOU= SKO/TOUS 

5
KAI\ E)KA/LESEN O( QEO\S TO\ FW=S H(ME/RAN KAI\ TO\ SKO/TOS E)KA/LESEN NU/KTA KAI\ E)GE/NETO E(SPE/RA KAI\ E)GE/NETO PRWI/ H(ME/RA MI/A 

6
KAI\ EI)=PEN O( QEO/S GENHQH/TW STERE/WMA E)N ME/SW| TOU= U(/DATOS KAI\ E)/STW DIAXWRI/ZON A)NA\ ME/SON U(/DATOS KAI\ U(/DATOS KAI\ E)GE/NETO OU(/TWS 

7
KAI\ E)POI/HSEN O( QEO\S TO\ STERE/WMA KAI\ DIEXW/RISEN O( QEO\S A)NA\ ME/SON TOU= U(/DATOS O(\ H)=N U(POKA/TW TOU= STEREW/MATOS KAI\ A)NA\ ME/SON TOU= U(/DATOS TOU= E)PA/NW TOU= STEREW/MATOS 

8
KAI\ E)KA/LESEN O( QEO\S TO\ STERE/WMA OU)RANO/N KAI\ EI)=DEN O( QEO\S O(/TI KALO/N KAI\ E)GE/NETO E(SPE/RA 

### Sample the Morphology Data

In [16]:
for word in L.d(T.nodeFromSection(('Psalms', '1', '1')), otype='word'):
    
    print(F.trans.v(word), ' — ', F.morph.v(word))

MAKA/RIOS  —  A1A.NSM.MAKA/RIOS
A)NH/R  —  N3.NSM.A)NH/R
O(\S  —  RR.NSM.O(/S
OU)K  —  D.OU)
E)POREU/QH  —  VCI.API3S.POREU/OMAI
E)N  —  P.E)N
BOULH=|  —  N1.DSF.BOULH/
A)SEBW=N  —  A3H.GPM.A)SEBH/S
KAI\  —  C.KAI/
E)N  —  P.E)N
O(DW=|  —  N2.DSF.O(DO/S
A(MARTWLW=N  —  A1B.GPM.A(MARTWLO/S
OU)K  —  D.OU)
E)/STH  —  VHI.AAI3S.I(/STHMI
KAI\  —  C.KAI/
E)PI\  —  P.E)PI/
KAQE/DRAN  —  N1A.ASF.KAQE/DRA
LOIMW=N  —  N2.GPM.LOIMO/S
OU)K  —  D.OU)
E)KA/QISEN  —  VAI.AAI3S.I(/ZW.KATA
