<p style="text-align:center;font-size:30px;font-weight:bold">Clause Syntax in the Song of Songs:<br><br> A Preliminary Study</p>
<br>
<br>
# Song of Songs Clause Relationships
<strong>Purpose of this notebook:</strong>
<br>
<br>
This notebook produces  statistics for mother-daughter relations in the Song.
<br>
<br>
For the purpose of this study, "mainline" clauses are those which are not adverbial or adjectival. Clauses such as vocatives, ellipses, casus pendens, and defective are excluded. Also, clauses stored as "AjCl" in ETCBC are considered nominal clauses for the purpose here.
<br>
<br>
In this notebook, statistics are printed to the console. However, simple modifications to the search could write results to a CSV, txt file, or HTML display.

In [14]:
import sys
import collections

from laf.fabric import LafFabric
from etcbc.preprocess import prepare
fabric = LafFabric(verbose = '')

  0.00s This is LAF-Fabric 4.5.21
API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html
Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html



In [15]:
API = fabric.load('etcbc4b','--','song_cl_rela',

                  {'primary': False,
                   'xmlids':{'node':False,'edge':False},
                   'features':('book chapter verse otype code typ','mother'),
                   'prepare': prepare,
                    }
)
exec(fabric.localnames.format(var='fabric'))

  0.00s LOADING API: please wait ... 
  0.02s USING main  DATA COMPILED AT: 2015-11-02T15-08-56
  4.36s LOGFILE=/Users/Cody/laf-fabric-output/etcbc4b/song_cl_rela/__log__song_cl_rela.txt
  4.36s INFO: LOADING PREPARED data: please wait ... 
  4.36s prep prep: G.node_sort
  4.56s prep prep: G.node_sort_inv
  5.42s prep prep: L.node_up
  9.75s prep prep: L.node_down
    19s prep prep: V.verses
    19s prep prep: V.books_la
    19s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html
    23s INFO: LOADED PREPARED data
    23s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX lexicon FOR TASK song_cl_rela AT 2016-05-11T17-26-39


In [16]:
#indexes all nodes in Song of Songs for faster searches

corpus = 'Canticum'
cur_book = None
nodes = []

for n in NN():
    
    if cur_book == corpus:
        nodes.append(n)
    
    if F.otype.v(n) == 'book':
        cur_book = F.book.v(n)
        
msg('{} nodes appended'.format(len(nodes)))

    12s 6020 nodes appended


In [17]:
#provides a function to filter out only the mainline clause types
#for this search, mainline is defined as asyndetic, parallel, coordinate with w or )w and direct speech (999)

import re

def code_filter(code):
    
    code = int(code)
    
    #asyndetic
    if 100 <= code <= 167:
        return True
    
    #parallel
    elif 200 <= code <= 201:
        return True
    
    #asyndetic with conj.
    elif 300 <= code <= 367:
        return True
    
    #syndetic
    elif 400 <= code <= 487:
        return True
    
    #first cl in direct speech
    elif code == 999:
        return True
    
def get_percent(total,freq):
    return str(round(float(freq / total)*100,2))+'%'

# Search with Less-Simplified Results

In [18]:
#takes in 'mother>daughter' format and simplifies the clause type
def cl_simplifier(x):
        
        types = ['qtl','yqtl','impv','way','NmCl']
        
        #converts mothers
        qtlM = re.findall('Q',x.split('>')[0],0)
        yqtlM = re.findall('Y',x.split('>')[0],0)
        impvM = re.findall('I',x.split('>')[0],0)
        wayM = re.findall('Way',x.split('>')[0],0)
        NmClM = re.findall('AjCl',x.split('>')[0],0)
        
        #converts daughters
        qtlD = re.findall('Q',x.split('>')[1],0)
        yqtlD = re.findall('Y',x.split('>')[1],0)
        impvD = re.findall('I',x.split('>')[1],0)
        wayD = re.findall('Way',x.split('>')[1],0)
        NmClD = re.findall('AjCl',x.split('>')[1],0)
        
        mother = None
        daughter = None
        
        for typ in types:
            if eval('{}M'.format(typ)) != []:
                mother = typ
        for typ in types:
            if eval('{}D'.format(typ)) != []:
                daughter = typ
                
        if mother and daughter != None:
            return '{}>{}'.format(mother,daughter)
        elif mother != None and daughter == None:
            return '{}>{}'.format(mother,x.split('>')[1])
        elif daughter != None and mother == None:
            return '{}>{}'.format(x.split('>')[0],daughter)
        else:
            return x

In [19]:
#Returns Complicated Results

results = []
mother_daughter = collections.OrderedDict()

for n in nodes:
    otype = F.otype.v(n)
    if otype == 'clause_atom'and code_filter(F.code.v(n)) == True:
        results.append([F.typ.v(list(C.mother.v(n))[0]),F.typ.v(n)])
        
msg(str(len(results)))

total = 0

for x in results:
    if '{}>{}'.format(x[0],x[1]) not in mother_daughter:
        mother_daughter['{}>{}'.format(x[0],x[1])] = 1
        total += 1
    else:
        mother_daughter['{}>{}'.format(x[0],x[1])] += 1
        total += 1 
        
mother_daughter = collections.OrderedDict(sorted(mother_daughter.items(), key=lambda t: t[1],reverse = True))
q_total = 0

for item in mother_daughter:
    qatal = re.findall('Q',item.split('>')[0],0)
    if len(qatal) != 0:
        q_total += 1

for item in mother_daughter:
    qatal = re.findall('Y',item.split('>')[0],0)
    if len(qatal) != 0:
        pass
    
for item in mother_daughter:
    print (item, mother_daughter[item],get_percent(total,mother_daughter[item]))
        
print (len(mother_daughter))


    26s 389


NmCl>NmCl 62 15.94%
AjCl>AjCl 14 3.6%
NmCl>Ptcp 14 3.6%
AjCl>NmCl 11 2.83%
ZYq0>ZYq0 11 2.83%
ZIm0>Voct 11 2.83%
ZIm0>ZIm0 9 2.31%
Ptcp>Ptcp 8 2.06%
ZQt0>ZQt0 8 2.06%
ZIm0>WIm0 7 1.8%
AjCl>Voct 6 1.54%
ZQt0>Voct 6 1.54%
NmCl>ZYq0 4 1.03%
AjCl>ZIm0 4 1.03%
XQtl>ZQt0 4 1.03%
XQtl>NmCl 4 1.03%
XQtl>XQtl 4 1.03%
ZQt0>WxQ0 4 1.03%
Ptcp>NmCl 4 1.03%
NmCl>Voct 4 1.03%
ZIm0>xYq0 3 0.77%
CPen>NmCl 3 0.77%
NmCl>ZQt0 3 0.77%
ZIm0>NmCl 3 0.77%
xYq0>WxY0 3 0.77%
XQtl>WXQt 3 0.77%
NmCl>ZIm0 3 0.77%
xQt0>ZQt0 3 0.77%
ZQt0>ZQtX 3 0.77%
ZQtX>ZQt0 3 0.77%
xQtX>xQt0 3 0.77%
NmCl>AjCl 3 0.77%
xQtX>Voct 3 0.77%
ZQt0>xYq0 3 0.77%
NmCl>XQtl 3 0.77%
ZYq0>WYq0 2 0.51%
AjCl>xYq0 2 0.51%
ZQt0>Ptcp 2 0.51%
XQtl>Defc 2 0.51%
xYq0>xYq0 2 0.51%
ZQtX>Ellp 2 0.51%
NmCl>xYq0 2 0.51%
XQtl>AjCl 2 0.51%
NmCl>CPen 2 0.51%
NmCl>xQt0 2 0.51%
ZQt0>NmCl 2 0.51%
ZQt0>ZIm0 2 0.51%
NmCl>WXYq 2 0.51%
NmCl>ZQtX 2 0.51%
Voct>ZIm0 2 0.51%
xYqX>WQtX 2 0.51%
Defc>Ellp 2 0.51%
xQt0>Ptcp 2 0.51%
Ptcp>AjCl 2 0.51%
Ellp>Ellp 2 0.51%
Ptcp>Z

In [20]:
#Returns complicated results in a simplified format

mother_daughter = collections.OrderedDict()

total_clauses = 0

for x in results:
    m_d = '{}>{}'.format(x[0],x[1])
    M_D = cl_simplifier(m_d)
    if M_D not in mother_daughter:
        mother_daughter[M_D] = 1
        total_clauses += 1
    else:
        mother_daughter[M_D] += 1
        total_clauses += 1 
        
mother_daughter = collections.OrderedDict(sorted(mother_daughter.items(), key=lambda t: t[1],reverse = True))


for item in mother_daughter:
    #lift break for complete list
    break
    print (item,mother_daughter[item],get_percent(total,mother_daughter[item]))



types = ['NmCl','qtl','yqtl','impv','Ptcp','Ellp','Defc','Voct','way','CPen']

import csv


with open('Song_cl_rela.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    types2 = types
    writer.writerow([' ']+[item for item in types])
    
    for typ in types:
        occurrences = []
        for typ2 in types2:
            if '{}>{}'.format(typ,typ2) in mother_daughter:
                key = mother_daughter['{}>{}'.format(typ,typ2)]
                occurrences.append(key)
            else:
                occurrences.append(0)
        
        writer.writerow([typ]+[x for x in occurrences])
        
#arranges results grouped by mother typ

for typ in types:
    total = 0
    for item in mother_daughter:
        if item.split('>')[0] == typ:
            print ('{:11}{:11}     {}'.format(item,mother_daughter[item],get_percent(total_clauses,mother_daughter[item])))
            total += mother_daughter[item]
    print ('\n {:11}{:11}     {}\n\n'.format('',total,get_percent(total_clauses,total)))

NmCl>NmCl           90     23.14%
NmCl>qtl            16     4.11%
NmCl>Ptcp           16     4.11%
NmCl>yqtl           13     3.34%
NmCl>Voct           10     2.57%
NmCl>impv            7     1.8%
NmCl>CPen            2     0.51%
NmCl>Ellp            1     0.26%

                    155     39.85%


qtl>qtl             55     14.14%
qtl>NmCl            14     3.6%
qtl>Voct            12     3.08%
qtl>yqtl            11     2.83%
qtl>Ellp             7     1.8%
qtl>impv             5     1.29%
qtl>Ptcp             4     1.03%
qtl>Defc             3     0.77%
qtl>way              1     0.26%

                    112     28.79%


yqtl>yqtl           26     6.68%
yqtl>qtl             7     1.8%
yqtl>NmCl            3     0.77%
yqtl>Voct            2     0.51%
yqtl>Ellp            2     0.51%
yqtl>impv            1     0.26%
yqtl>Ptcp            1     0.26%

                     42     10.8%


impv>impv           18     4.63%
impv>Voct           13     3.34%
impv>yqtl            7     1.8%

# Search with Simplified Results

In [21]:
def code_translator(code):
    verbs = {'0':'nmcl','1':'yqtl','2':'qtl','3':'impv', '4':'infc', '5':'infa','6':'ptc','7':'way','9':"Q"}
    daughter = verbs[code[1]]
    mother = verbs[code[2]]
    return ('{} > {}'.format(mother,daughter))

In [22]:
#search for mainline clauses and count their occurrence

clauses = collections.OrderedDict([])
total_clauses = 0

for n in nodes:
    otype = F.otype.v(n)
    if otype == 'clause_atom':
        if code_filter(F.code.v(n)) == True:
            total_clauses += 1
            code = code_translator(F.code.v(n))
            if code not in clauses:
                clauses[code] = 1
            else:
                clauses[code] += 1
msg('{} clause types found in {} clauses'.format(len(clauses),total_clauses))

    42s 27 clause types found in 389 clauses


In [23]:
#prints and saves ordered results


sorted_clauses = collections.OrderedDict(sorted(clauses.items(), key=lambda t: t[1],reverse = True))

for item in sorted_clauses:
    print ('{:11}{:11}     {}'.format(item,clauses[item],get_percent(total_clauses,clauses[item])))


nmcl > nmcl        141     36.25%
qtl > qtl           43     11.05%
qtl > nmcl          35     9.0%
yqtl > nmcl         27     6.94%
impv > nmcl         20     5.14%
nmcl > qtl          17     4.37%
nmcl > ptc          17     4.37%
nmcl > yqtl         13     3.34%
qtl > yqtl          10     2.57%
yqtl > yqtl         10     2.57%
nmcl > impv          8     2.06%
impv > yqtl          7     1.8%
yqtl > qtl           7     1.8%
ptc > nmcl           6     1.54%
ptc > ptc            5     1.29%
qtl > ptc            4     1.03%
qtl > impv           4     1.03%
impv > qtl           3     0.77%
ptc > impv           3     0.77%
Q > Q                2     0.51%
yqtl > impv          1     0.26%
impv > ptc           1     0.26%
qtl > way            1     0.26%
nmcl > way           1     0.26%
infc > qtl           1     0.26%
yqtl > ptc           1     0.26%
ptc > qtl            1     0.26%


In [24]:
#arranges results grouped by mother typ

typs = ['nmcl','yqtl','qtl','impv','ptc','way']

for typ in typs:
    total = 0
    for item in sorted_clauses:
        if item.split(' > ')[0] == typ:
            print ('{:11}{:11}     {}'.format(item,sorted_clauses[item],get_percent(total_clauses,sorted_clauses[item])))
            total += sorted_clauses[item]
    print ('\n {:11}{:11}     {}\n\n'.format('',total,get_percent(total_clauses,total)))

nmcl > nmcl        141     36.25%
nmcl > qtl          17     4.37%
nmcl > ptc          17     4.37%
nmcl > yqtl         13     3.34%
nmcl > impv          8     2.06%
nmcl > way           1     0.26%

                    197     50.64%


yqtl > nmcl         27     6.94%
yqtl > yqtl         10     2.57%
yqtl > qtl           7     1.8%
yqtl > impv          1     0.26%
yqtl > ptc           1     0.26%

                     46     11.83%


qtl > qtl           43     11.05%
qtl > nmcl          35     9.0%
qtl > yqtl          10     2.57%
qtl > ptc            4     1.03%
qtl > impv           4     1.03%
qtl > way            1     0.26%

                     97     24.94%


impv > nmcl         20     5.14%
impv > yqtl          7     1.8%
impv > qtl           3     0.77%
impv > ptc           1     0.26%

                     31     7.97%


ptc > nmcl           6     1.54%
ptc > ptc            5     1.29%
ptc > impv           3     0.77%
ptc > qtl            1     0.26%

                     15 

In [25]:
#arranges results grouped by daughter typ

typs = ['nmcl','yqtl','qtl','impv','ptc','way']

for typ in typs:
    total = 0
    for item in sorted_clauses:
        if item.split(' > ')[1] == typ:
            print ('{:11}{:11}     {}'.format(item,sorted_clauses[item],get_percent(total_clauses,sorted_clauses[item])))
            total += sorted_clauses[item]
    print ('\n {:11}{:11}     {}\n\n'.format('',total,get_percent(total_clauses,total)))

nmcl > nmcl        141     36.25%
qtl > nmcl          35     9.0%
yqtl > nmcl         27     6.94%
impv > nmcl         20     5.14%
ptc > nmcl           6     1.54%

                    229     58.87%


nmcl > yqtl         13     3.34%
qtl > yqtl          10     2.57%
yqtl > yqtl         10     2.57%
impv > yqtl          7     1.8%

                     40     10.28%


qtl > qtl           43     11.05%
nmcl > qtl          17     4.37%
yqtl > qtl           7     1.8%
impv > qtl           3     0.77%
infc > qtl           1     0.26%
ptc > qtl            1     0.26%

                     72     18.51%


nmcl > impv          8     2.06%
qtl > impv           4     1.03%
ptc > impv           3     0.77%
yqtl > impv          1     0.26%

                     16     4.11%


nmcl > ptc          17     4.37%
ptc > ptc            5     1.29%
qtl > ptc            4     1.03%
impv > ptc           1     0.26%
yqtl > ptc           1     0.26%

                     28     7.2%


qtl > way            1