In [None]:
import sys, os, collections
from tf.fabric import Fabric
DATABASE = '~/Programming/tf-github'
BHSA = 'bhsa/tf/2017'
TF = Fabric(locations=[DATABASE], modules=[BHSA], silent=False)

api = TF.load('''
    vt lex domain
    book chapter verse
    sp freq_lex
    kind rela
''')
api.makeAvailableIn(globals())

This is just about setting up some basic functions we will use later

In [2]:
import verbs
scope = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "1_Samuel", "2_Samuel", "Psalms", "Proverbs"]
def inScope(node):
    book, chapter, verse = T.sectionFromNode(node)
    return book in scope

def isPoetry(node):
    book, chapter, verse = T.sectionFromNode(node)
    if book == "Psalms" or book == "Proverbs":
        return True
    elif book == "Genesis":
        if chapter == 49 and verse > 1 and verse < 28:
            return True
    elif book == "Exodus":
        if chapter == 15 and verse < 19:
            return True
    elif book == "Numbers":
        if chapter == 21 and verse in [15, 18, 27, 28, 29, 30]:
            return True
        if chapter == 23 and ((verse > 6 and verse < 11) or (verse > 17 and verse < 24)):
            return True
        if chapter == 24 and ((verse > 2 and verse < 10) or (verse > 14 and verse < 25)):
            return True
    elif book == "Deuteronomy":
        if chapter == 28 and ((verse > 2 and verse < 7) or (verse > 15 and verse < 20)):
            return True
        if chapter == 32 and verse < 44:
            return True
        if chapter == 33:
            return True
    

# scopes = []
# for w in F.otype.s('word'):
#     book, chapter, verse = T.sectionFromNode(w)
#     if book in scopes:
#         continue
#     print(book)
#     scopes.append(book)

Now we're going to compare prose and poetry at a word level

In [3]:
counter = 0

attribute_tally = {
    "poetry": {
        "verb_tenses": {},
        "words": 0,
        "rare": 0
    },
    "prose": {
        "verb_tenses": {},
        "words": 0,
        "rare": 0
    }
}
progress = 0
for w in F.otype.s('word'):
    progress += 1
    if progress % 25000 == 0:
        print(progress)
    if inScope(w):
        form = "poetry" if isPoetry(w) else "prose"
        attribute_tally[form]["words"] += 1
        
        # Add to sp tally
        sp = F.sp.v(w)
        if sp + "_count" not in attribute_tally[form]:
            attribute_tally[form][sp + "_count"] = 0
        attribute_tally[form][sp + "_count"] += 1
        
        # Add vt to tally
        if F.sp.v(w) == "verb":
            vt = F.vt.v(w) if not verbs.is_weqt(w) else "weqt"
            if vt + "_count" not in attribute_tally[form]["verb_tenses"]:
                attribute_tally[form]["verb_tenses"][vt + "_count"] = 0
            attribute_tally[form]["verb_tenses"][vt + "_count"] += 1
        
        # Add rare words to tally
        if F.freq_lex.v(w) < 10:
            attribute_tally[form]["rare"] += 1
print("done")

25000
50000
75000
100000
125000
150000
175000
200000
225000
250000
275000
300000
325000
350000
375000
400000
425000
done


In [4]:
# Create a nice table to compare
from IPython.display import HTML

import copy
flat_tally = copy.deepcopy(attribute_tally)
del flat_tally["poetry"]["verb_tenses"]
del flat_tally["prose"]["verb_tenses"]
for x in attribute_tally["poetry"]["verb_tenses"]:
    flat_tally["poetry"]["vt_" + x] = attribute_tally["poetry"]["verb_tenses"][x]
    flat_tally["prose"]["vt_" + x] = attribute_tally["prose"]["verb_tenses"][x]
    
rows = ""
for x in flat_tally["poetry"]:
    row_data = [
        x,
        str(flat_tally["poetry"][x]),
        str(round((flat_tally["poetry"][x] / flat_tally["poetry"]["words"]) * 100, 2)),
        str(flat_tally["prose"][x]),
        str(round((flat_tally["prose"][x] / flat_tally["prose"]["words"]) * 100, 2)),
    ]
    rows += "<tr><td>" + "</td><td>".join(row_data) + "</td></tr>\n"

HTML("""
<table>
    <thead>
        <tr><td rowspan=2></td><td colspan=2><b>Poetry</b></td><td colspan=2><b>Prose</b></td></tr>
        <tr><td>count</td><td>%</td><td>count</td><td>%</td></tr>
    </thead>
    <tbody>
""" + rows + """
    </tbody>
</table>
""")

Unnamed: 0_level_0,Poetry,Poetry,Prose,Prose
Unnamed: 0_level_1,count,%,count,%
words,36395,100.0,145301,100.0
rare,1902,5.23,4011,2.76
verb_count,8196,22.52,23907,16.45
conj_count,3925,10.78,22749,15.66
subs_count,12501,34.35,39744,27.35
nmpr_count,1513,4.16,11062,7.61
prep_count,5785,15.9,26122,17.98
prps_count,397,1.09,1768,1.22
art_count,1023,2.81,11662,8.03
nega_count,753,2.07,1789,1.23


Now we're going to compare prose and poetry at a clause level

In [8]:
clause_tally = {
    "poetry": {
        "count": 0
    },
    "prose": {
        "count": 0
    }
}
progress = 0
for c in F.otype.s('clause'):
    progress += 1
    if progress % 25000 == 0:
        print(progress)
    if inScope(c):
        form = "poetry" if isPoetry(c) else "prose"
        clause_tally[form]["count"] += 1
        
        # Add to kind tally
        kind = "kind_" + F.kind.v(c)
        if kind + "_count" not in clause_tally[form]:
            clause_tally[form][kind + "_count"] = 0
        clause_tally[form][kind + "_count"] += 1
        
        # Add to rela tally
        rela = "rela_" + F.rela.v(c)
        if rela + "_count" not in clause_tally[form]:
            clause_tally[form][rela + "_count"] = 0
        clause_tally[form][rela + "_count"] += 1

25000
50000
75000


In [14]:
english_rows = {
    "kind_VC_count": "Verbal Clause",
    "kind_WP_count": "Clause w/o Predication",
    "kind_NC_count": "Nominal Clause",
    "rela_ReVo_count": "Referral to the vocative",
    "rela_Coor_count": "Coordinated clause",
    "rela_Attr_count": "Attributive clause",
    "rela_Cmpl_count": "Complement clause",
    "rela_Resu_count": "Resumptive clause",
    "rela_Adju_count": "Adjunctive Clause",
    "rela_Objc_count": "Object clause",
    "rela_RgRc_count": "Regens/rectum connection",
    "rela_Subj_count": "Subject clause",
    "rela_PreC_count": "Predicative complement clause",
    "rela_Spec_count": "Specification clause",
}
rows = ""
for x in clause_tally["poetry"]:
    row_title = english_rows[x] if x in english_rows else x
    row_data = [
        row_title,
        str(clause_tally["poetry"][x]),
        str(round((clause_tally["poetry"][x] / clause_tally["poetry"]["count"]) * 100, 1)),
        str(clause_tally["prose"][x]),
        str(round((clause_tally["prose"][x] / clause_tally["prose"]["count"]) * 100, 1)),
    ]
    rows += "<tr><td>" + "</td><td>".join(row_data) + "</td></tr>\n"

HTML("""
<table>
    <thead>
        <tr><td rowspan=2></td><td colspan=2><b>Poetry</b></td><td colspan=2><b>Prose</b></td></tr>
        <tr><td>count</td><td>%</td><td>count</td><td>%</td></tr>
    </thead>
    <tbody>
""" + rows + """
    </tbody>
</table>
""")

Unnamed: 0_level_0,Poetry,Poetry,Prose,Prose
Unnamed: 0_level_1,count,%,count,%
count,10288,100.0,27761,100.0
Verbal Clause,7329,71.2,22553,81.2
rela_NA_count,8146,79.2,20875,75.2
Clause w/o Predication,1184,11.5,1508,5.4
Nominal Clause,1775,17.3,3700,13.3
Referral to the vocative,113,1.1,21,0.1
Coordinated clause,810,7.9,775,2.8
Attributive clause,394,3.8,2292,8.3
Complement clause,32,0.3,89,0.3
Resumptive clause,149,1.4,486,1.8
