<p style="text-align:center;font-size:30px;font-weight:bold">Clause Syntax in the Song of Songs:<br><br> A Preliminary Study</p>
<br>
<br>
# Song of Songs Word Order Statistics (verbal)
<strong>Purpose of this notebook:</strong>
<br>
<br>
This search produces word order statistics for verbal clauses.
<br>
<br>
For the purpose of this study, "mainline" clauses are those which are not adverbial or adjectival. Clauses such as vocatives, ellipses, casus pendens, and defective are excluded. Also, clauses stored as "AjCl" in ETCBC are considered nominal clauses for the purpose here.
<br>
<br>
In this notebook, statistics are printed to the console. However, simple modifications to the search could write results to a CSV, txt file, or HTML display.

In [1]:
import sys
import collections

from laf.fabric import LafFabric
from etcbc.preprocess import prepare
fabric = LafFabric(verbose = '')

  0.00s This is LAF-Fabric 4.5.21
API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html
Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html



In [2]:
API = fabric.load('etcbc4b','--','word_order_verbs',

                  {'primary': False,
                   'xmlids':{'node':False,'edge':False},
                   'features':('book chapter verse otype code typ txt monads',''),
                   'prepare': prepare
        
                    }
)
exec(fabric.localnames.format(var='fabric'))

  0.00s LOADING API: please wait ... 
  0.01s USING main  DATA COMPILED AT: 2015-11-02T15-08-56
  3.69s LOGFILE=/Users/Cody/laf-fabric-output/etcbc4b/word_order_verbs/__log__word_order_verbs.txt
  3.69s INFO: LOADING PREPARED data: please wait ... 
  3.69s prep prep: G.node_sort
  3.82s prep prep: G.node_sort_inv
  4.34s prep prep: L.node_up
  7.60s prep prep: L.node_down
    13s prep prep: V.verses
    13s prep prep: V.books_la
    13s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html
    15s INFO: LOADED PREPARED data
    15s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX lexicon FOR TASK word_order_verbs AT 2016-05-11T17-30-32


In [3]:
import re

In [4]:
#indexes all nodes in corpus for faster searches

corpus = ["Canticum"]

cur_book = None
nodes = []

for n in NN():
    if cur_book in corpus:
        nodes.append(n)
    
    if F.otype.v(n) == 'book':
        cur_book = F.book.v(n)
        
msg('{} nodes appended'.format(len(nodes)))

    16s 6020 nodes appended


In [5]:
def get_percent(total,freq):
    return str(round(float(freq / total)*100,2))+'%'

# All Clauses in Song

In [6]:
Song_Clauses = []

clause_filter = ['AjCl', 'CPen', 'Defc', 'Ellp', 'InfA', 'InfC', 
                 'MSyn', 'NmCl', 'Ptcp', 'Reop','Unkn', 'Voct']

for node in nodes:
    otype = F.otype.v(node)
    
    if otype == "clause" and F.typ.v(node) not in clause_filter:
        Song_Clauses.append(F.typ.v(node))
        
msg(str(len(Song_Clauses))+" loaded")


    17s 211 loaded


In [7]:
fronted = []
fronted_subject = []

total = 0
fronted_total = 0
fronted_sub_total = 0

for clause in Song_Clauses:
    total += 1
    tester = re.findall('X.|x.',clause,0)
    if len(tester) != 0:
        fronted.append(clause)
        fronted_total += 1
msg(str(len(fronted))+" loaded")

for clause in fronted:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subject.append(clause)
        fronted_sub_total += 1

    19s 103 loaded


In [8]:
print ('total: {}'.format(fronted_sub_total+fronted_total))
print ('fronted: {} ({})'.format(fronted_total, get_percent(total,fronted_total)))
print ('fr_Subj: {} ({})'.format(fronted_sub_total, get_percent(total,fronted_sub_total)))

total: 130
fronted: 103 (48.82%)
fr_Subj: 27 (12.8%)


# Main Clauses

In [9]:
def code_filter(node):
    
    code = int(F.code.v(node))
    clause_filter = ['AjCl', 'CPen', 'Defc', 'Ellp', 'InfA', 'InfC', 
                 'MSyn', 'NmCl', 'Ptcp', 'Reop','Unkn', 'Voct']
    
    if F.typ.v(node) in clause_filter:
        return False
    
    #asyndetic
    elif 100 <= code <= 167:
        return True
    
    #parallel
    elif 200 <= code <= 201:
        return True
    
    #asyndetic with conj.
    elif 300 <= code <= 367:
        return True
        
    #syndetic
    elif 400 <= code <= 487:
        return True
    
    #first cl in direct speech
    elif code == 999:
        return True

In [10]:
Song_Clauses = []

for node in nodes:
    otype = F.otype.v(node)
    
    if otype == "clause_atom" and code_filter(node)==True:
        Song_Clauses.append(F.typ.v(node))
        
msg(str(len(Song_Clauses))+" loaded")


    21s 176 loaded


In [11]:
fronted = []
fronted_subject = []
total = 0
fronted_total = 0
fronted_sub_total = 0

for clause in Song_Clauses:
    total += 1
    tester = re.findall('X.|x.',clause,0)
    if len(tester) != 0:
        fronted.append(clause)
        fronted_total += 1
msg(str(len(fronted))+" loaded")

for clause in fronted:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subject.append(clause)
        fronted_sub_total += 1

    22s 68 loaded


In [12]:
print ('total: {}'.format(fronted_sub_total+fronted_total))
print ('fronted: {} ({})'.format(fronted_total, get_percent(total,fronted_total)))
print ('fr_Subj: {} ({})'.format(fronted_sub_total, get_percent(total,fronted_sub_total)))

total: 92
fronted: 68 (38.64%)
fr_Subj: 24 (13.64%)


# Only Clauses with Subjects (out of all)

In [13]:
Song_Clauses = []

clause_filter = ['AjCl', 'CPen', 'Defc', 'Ellp', 'InfA', 'InfC', 
                 'MSyn', 'NmCl', 'Ptcp', 'Reop','Unkn', 'Voct']

for node in nodes:
    otype = F.otype.v(node)
    
    if otype == "clause" and F.typ.v(node) not in clause_filter:
        Song_Clauses.append(F.typ.v(node))
        
msg(str(len(Song_Clauses))+" loaded")


    24s 211 loaded


In [14]:
fronted = []
fronted_subject = []
total = 0
fronted_total = 0
fronted_sub_total = 0

for clause in Song_Clauses:
    total += 1
    tester = re.findall('X',clause,0)
    if len(tester) != 0:
        fronted.append(clause)
        fronted_total += 1
msg(str(len(fronted))+" loaded")

for clause in fronted:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subject.append(clause)
        fronted_sub_total += 1

    25s 65 loaded


In [15]:
print ('total: {}'.format(fronted_sub_total+fronted_total))
print ('fronted: {} ({} of total)'.format(fronted_total, get_percent(total,fronted_total)))
print ('fr_Subj: {} ({} of fronted)'.format(fronted_sub_total, get_percent(total,fronted_sub_total)))

total: 92
fronted: 65 (30.81% of total)
fr_Subj: 27 (12.8% of fronted)


# Only Clauses with Subjects (main only)

In [16]:
Song_Clauses = []

for node in nodes:
    otype = F.otype.v(node)
    
    if otype == "clause_atom" and code_filter(node)==True:
        Song_Clauses.append(F.typ.v(node))
        
msg(str(len(Song_Clauses))+" loaded")


    27s 176 loaded


In [17]:
subjects = []
fronted_subjects = []

total = 0
subjects_total = 0
fronted_sub_total = 0

for clause in Song_Clauses:
    total += 1
    tester = re.findall('X',clause,0)
    if len(tester) != 0:
        subjects.append(clause)
        subjects_total += 1
msg(str(len(subjects))+" loaded")

for clause in subjects:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subjects.append(clause)
        fronted_sub_total += 1

    27s 50 loaded


In [18]:
print ('total: {}'.format(fronted_sub_total+subjects_total))
print ('subjects: {} ({} of total)'.format(subjects_total, get_percent(total,subjects_total)))
print ('fr_Subj: {} ({} of subjects)'.format(fronted_sub_total, get_percent(total,fronted_sub_total)))

total: 74
subjects: 50 (28.41% of total)
fr_Subj: 24 (13.64% of subjects)


# Ancient Poetry Statistics (main, all)

In [19]:
def get_ref(ref_type,node):
    if ref_type == 'verse':
        verse = L.u('verse',node)
        return F.verse.v(verse)
    elif ref_type == 'chapter':
        chapter = L.u('chapter',node)
        return F.chapter.v(chapter)
    elif ref_type == 'book':
        book = L.u('book',node)
        return F.book.v(book)

corpus = {'Genesis_49':[str(x) for x in range (1,28)],
          'Exodus_15':[str(x) for x in range (1,22)],
          'Numeri_23':[str(x) for x in range (7,11)]+[str(x) for x in range (18,25)],
          'Numeri_24':[str(x) for x in range (3,10)]+[str(x) for x in range (15,25)],
          'Deuteronomium_32':[str(x) for x in range (1,44)],
          'Deuteronomium_33':[str(x) for x in range (2,30)],
          'Judices_5':[str(x) for x in range (2,32)]
         }

cur_book = None
nodes = []

for n in NN():
    chapter = get_ref('chapter',n)
    verse = get_ref('verse',n)
    if ('{}_{}'.format(cur_book,chapter) in corpus) and verse in corpus['{}_{}'.format(cur_book,chapter)]:
        nodes.append(n)
    
    if F.otype.v(n) == 'book':
        cur_book = F.book.v(n)
        
msg('{} nodes appended'.format(len(nodes)))

    37s 9523 nodes appended


In [20]:
poetry_clauses = []

for node in nodes:
    otype = F.otype.v(node)
    
    if otype == "clause_atom" and code_filter(node)==True:
        poetry_clauses.append(F.typ.v(node))
        
msg(str(len(poetry_clauses))+" loaded")


    37s 398 loaded


In [21]:
fronted = []
fronted_subject = []

total = 0
fronted_total = 0
fronted_sub_total = 0

for clause in poetry_clauses:
    total += 1
    tester = re.findall('X.|x.',clause,0)
    if len(tester) != 0:
        fronted.append(clause)
        fronted_total += 1
msg(str(len(fronted))+" loaded")

for clause in fronted:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subject.append(clause)
        fronted_sub_total += 1

    37s 154 loaded


In [22]:
print ('total: {}'.format(total))
print ('fronted: {} ({} of total)'.format(fronted_total, get_percent(total,fronted_total)))
print ('fr_Subj: {} ({} of fronted)'.format(fronted_sub_total, get_percent(fronted_total,fronted_sub_total)))

total: 398
fronted: 154 (38.69% of total)
fr_Subj: 46 (29.87% of fronted)


# Ancient Poetry (main; subjects only)

In [23]:
subjects = []
fronted_subjects = []

total = 0
subjects_total = 0
fronted_sub_total = 0

for clause in poetry_clauses:
    total += 1
    tester = re.findall('X',clause,0)
    if len(tester) != 0:
        subjects.append(clause)
        subjects_total += 1
msg(str(len(subjects))+" loaded")

for clause in subjects:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subjects.append(clause)
        fronted_sub_total += 1

    41s 122 loaded


In [24]:
print ('total: {}'.format(total))
print ('subjects: {} ({} of total)'.format(subjects_total, get_percent(total,subjects_total)))
print ('fr_Subj: {} ({} of subjects)'.format(fronted_sub_total, get_percent(subjects_total,fronted_sub_total)))

total: 398
subjects: 122 (30.65% of total)
fr_Subj: 46 (37.7% of subjects)


# Psalms (main; subject only)

In [25]:
#indexes all nodes in corpus for faster searches

corpus = ["Psalmi"]

cur_book = None
nodes = []

for n in NN():
    if cur_book in corpus:
        nodes.append(n)
    
    if F.otype.v(n) == 'book':
        cur_book = F.book.v(n)
        
msg('{} nodes appended'.format(len(nodes)))

    47s 99028 nodes appended


In [26]:
psalm_clauses = []

for node in nodes:
    otype = F.otype.v(node)
    
    if otype == "clause_atom" and code_filter(node)==True:
        psalm_clauses.append(F.typ.v(node))
        
msg(str(len(psalm_clauses))+" loaded")

    47s 4017 loaded


In [27]:
subjects = []
fronted_subjects = []

total = 0
subjects_total = 0
fronted_sub_total = 0

for clause in psalm_clauses:
    total += 1
    tester = re.findall('X',clause,0)
    if len(tester) != 0:
        subjects.append(clause)
        subjects_total += 1
msg(str(len(subjects))+" loaded")

for clause in subjects:
    tester = re.findall('X.',clause,0)
    if len(tester) != 0:
        fronted_subjects.append(clause)
        fronted_sub_total += 1

    48s 1064 loaded


In [28]:
print ('total: {}'.format(total))
print ('subjects: {} ({} of total)'.format(subjects_total, get_percent(total,subjects_total)))
print ('fr_Subj: {} ({} of subjects)'.format(fronted_sub_total, get_percent(subjects_total,fronted_sub_total)))

total: 4017
subjects: 1064 (26.49% of total)
fr_Subj: 486 (45.68% of subjects)
