In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import collections

from tf.app import use
from tf.client.make.build import Make

In [3]:
Mk = Make("bhsa", "structure", debugState="on")

In [4]:
Mk.loadTf()

In [5]:
A = Mk.A
api = A.api
Fs = api.Fs
F = api.F
L = api.L
T = api.T

In [6]:
T.sectionTypes

['book', 'chapter', 'verse']

In [7]:
def orNull(x):
    return "" if x is None or x == "none" or x == "NA" or x == "unknown" else x


def pers(x):
    logical = orNull(x)
    return "" if logical == "" else logical[-1]


def numb(x):
    logical = orNull(x)
    return "" if logical == "" else logical[0]

# Pseudo features

We combine some features into one, for the sake of economy of layers.

Pseudo features are just dicts keyed by nodes.

# Person number gender

In [148]:
png = {}
pngFreq = collections.Counter()

for w in F.otype.s("word"):
    combi = f"{pers(F.ps.v(w))}{numb(F.nu.v(w))}{orNull(F.gn.v(w))}{orNull(F.st.v(w))}"
    if combi:
        png[w] = combi
        pngFreq[combi] += 1
        
for (val, n) in sorted(pngFreq.items()):
    print(f"\t\t{val:<10} {n:>5} x")

		1p          1155 x
		1s          7183 x
		2pf           54 x
		2pm         3934 x
		2sf          835 x
		2sm         7458 x
		3p          4209 x
		3pf          378 x
		3pm         6721 x
		3sf         4298 x
		3sm        25294 x
		a           6138 x
		c           1363 x
		da           545 x
		dc           261 x
		dfa         1301 x
		dfc          526 x
		dfe            6 x
		dma          197 x
		dmc           22 x
		p            776 x
		pfa         5325 x
		pfc         1303 x
		pfe           23 x
		pma        22572 x
		pmc         8318 x
		pme          189 x
		s             10 x
		sa         19708 x
		sc          3138 x
		se            15 x
		sf           631 x
		sfa        17625 x
		sfc         4273 x
		sfe          141 x
		sm          1243 x
		sma        67646 x
		smc        20104 x
		sme          491 x


# Part-of-speech, verbal stem and tense

In [146]:
for feat in ("vs", "vt", "pdp"):
    print(feat)
    for (val, n) in Fs(feat).freqList():
        print(f"\t\t{val:<10} {n:>5} x")

vs
		NA         352874 x
		qal        50205 x
		hif         9407 x
		piel        6811 x
		nif         4145 x
		hit          960 x
		peal         654 x
		pual         492 x
		hof          427 x
		hsht         172 x
		haf          163 x
		pael          88 x
		htpe          53 x
		peil          40 x
		htpa          30 x
		shaf          15 x
		etpa           8 x
		hotp           8 x
		pasq           6 x
		poel           5 x
		tif            5 x
		afel           4 x
		etpe           3 x
		htpo           3 x
		nit            3 x
		poal           3 x
vt
		NA         352874 x
		perf       21128 x
		impf       16099 x
		wayq       14974 x
		ptca        8403 x
		infc        6607 x
		impv        4307 x
		ptcp        1298 x
		infa         894 x
pdp
		subs       125378 x
		prep       74771 x
		verb       69026 x
		conj       64555 x
		nmpr       33112 x
		art        29081 x
		nega        6789 x
		advb        6646 x
		adjv        5391 x
		prps        4508 x
		prde        3135 x
		intj        1886 x


# Clause-Phrase(atom) features

Clauses, phrases and their atoms all have features some of which coincide largely
between atoms and their non-atom companion.

In [10]:
for feat in ("txt", "typ", "rela", "function"):
    print(feat)
    for tp in ("clause", "clause_atom", "phrase", "phrase_atom"):
        print(f"\t{tp}")
        for (val, n) in Fs(feat).freqList(nodeTypes={tp}):
            print(f"\t\t{val:<10} {n:>5} x")

txt
	clause
		N          19866 x
		NQ         16509 x
		Q          16367 x
		NQQ         4435 x
		?N          3713 x
		?           3510 x
		?Q          3019 x
		?NQ         2401 x
		QQ          2315 x
		NQQQ        1723 x
		QN          1433 x
		D           1222 x
		NQN         1062 x
		?QQ          951 x
		??           778 x
		QNQ          714 x
		ND           532 x
		NQNQ         495 x
		??N          483 x
		?NQQ         454 x
		DQ           444 x
		?QQQ         321 x
		??Q          299 x
		?NQN         298 x
		NQQQN        283 x
		DNQ          273 x
		QND          267 x
		NQQQQ        259 x
		QQQ          257 x
		NQQN         250 x
		NQND         168 x
		?QN          165 x
		?NQQQ        148 x
		QQN          136 x
		??NQ         132 x
		NDN          132 x
		???          125 x
		NQQQQQ       112 x
		QNQQ         109 x
		DN           106 x
		?QQQQ        103 x
		?ND           97 x
		QQQQ          90 x
		?QNQ          89 x
		?NQNQ         85 x
		QNQQQ         64 x
		NDQ           57 x
	

# Research

To what extent do verses and chapters cut through (sentence-clause-phrase)-atoms?

We'll find out.

In [11]:
def findCuts(cType, lType):
    cuts = []
    for c in F.otype.s(cType):
        cSlots = L.d(c, otype="word")
        cWfirst = cSlots[0]
        cWlast = cSlots[-1]
        
        lNodes = L.d(c, otype=lType)
        if (not lNodes):
            cuts.append((c, cWfirst, cWlast, None, None, None, None))
            continue
        lNfirst = lNodes[0]
        lNlast = lNodes[-1]
        lWfirst = L.d(lNfirst, otype="word")[0]
        lWlast = L.d(lNlast, otype="word")[-1]
        
        if cWfirst != lWfirst or cWlast != lWlast:
            cuts.append((c, cWfirst, cWlast, lNfirst, lWfirst, lNlast, lWlast))
    return cuts

cuts = {}

for container in ("verse", "chapter"):
    for ling in ("sentence", "clause", "phrase"):
        lingAtom = f"{ling}_atom"
        theseCuts = findCuts(container, lingAtom)
        print(f"{container:<8}-{lingAtom:<15} : {len(theseCuts):>5} cuts")
        if not theseCuts:
            break
        cuts[f"{container}-{lingAtom}"] = theseCuts
    if not cuts:
        break

verse   -sentence_atom   :  1970 cuts
verse   -clause_atom     :     0 cuts
chapter -sentence_atom   :     0 cuts


How is the sentence(-atom) structure?

In [12]:
sa = collections.Counter()

for s in F.otype.s("sentence"):
    n = len(L.d(s, otype="sentence_atom"))
    sa[n] += 1
    
sa

Counter({1: 63014, 2: 660, 3: 23, 6: 2, 4: 9, 7: 1, 23: 1, 5: 1})

# Run the record function

In [6]:
# do this if you have changed mkdata.py or config.yaml

Mk.config()

True

In [20]:
Mk.makeClientSettings()

Node type declared as result focus:

	sentence

Layers declared as visible in the result ('visible'):

	word/lex, word/phono, word/pdp, word/png, word/vs, word/vt, phrase/function, clause/rela, verse/number, chapter/number, book/book



In [21]:
Mk.makeLinks()

 2m 47s links for types book, chapter, verse
book                :     39 links
chapter             :    929 links
verse               :  23213 links
 2m 48s done


In [22]:
# Mk.config()
Mk.record()

  0.00s preparing ... 
  0.00s start recording
 39 2_Chronicles                                                                    
    42s wrap recorders for delivery
    42s 	word
    42s 		lex
    43s 		phono
    43s 		gloss
    44s 		pdp
    44s 	phrase
    44s 		ptype
    45s 	clause
    45s 		ttype
    45s 		ctype
    45s 	sentence
    45s 		number
    45s 	verse
    45s 		number
    45s 	chapter
    45s 		number
    45s 	book
    45s 		book
    45s 		ref
    45s wrap accumulators for delivery
    45s 	word
    45s 		png
    45s 		vs
    45s 		vt
    45s 	phrase
    45s 		function
    45s 	clause
    45s 		rela
    45s 	sentence
    45s 	verse
    45s 	chapter
    45s 	book

    48s done


In [23]:
Mk.dumpCorpus()

  0.00s Dumping data to compact json files
  0.02s Data texts-word-lex stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-lex.js
  0.05s Data texts-word-phono stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-phono.js
  0.07s Data texts-word-gloss stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-gloss.js
  0.09s Data texts-word-pdp stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-pdp.js
  0.11s Data texts-word-png stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-png.js
  0.12s Data texts-word-vs stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-vs.js
  0.14s Data texts-word-vt stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-word-vt.js
  0.16s Data texts-phrase-ptype stored in ~/github/annotation/app-bhsa/site/structure/corpus/texts-phrase-ptype.js
  0.18s Data texts-phrase-function stored in ~/github/annotation/app-bhsa/site/structure/

In [24]:
Mk.dumpConfig()

  8.99s Config written to file ~/github/annotation/app-bhsa/site/structure/corpus/config.js


In [10]:
Mk.makeClient()
Mk.adjustDebug()

Copied static files
html file written to /Users/dirk/github/annotation/app-bhsa/site/index.html
html file written to /Users/dirk/github/annotation/app-bhsa/site/structure/index.html
html file (for use with file://) written to /Users/dirk/github/annotation/app-bhsa/site/structure/index-local.html
Adjusting debug in /Users/dirk/github/annotation/app-bhsa/site/structure/js/defs.js
Debug set to true
