# Accent patterns

Request by Robert Voogdgeert.

Make a CSV of half verses in a representation that only shows accents and word boundaries.

In [1]:
import os
import re

from tf.app import use

In [2]:
A = use('bhsa', hoist=globals(), silent='deep')
silentOff()

# Pattern from half verse

We define a function to get the accent pattern from a half verse.

Let's first inspect a few half verses (the first and second ones):

In [3]:
(h1, h2) = F.otype.s('half_verse')[0:2]

for h in (h1, h2):
  print(T.text(h, fmt='text-trans-full'))

B.:-R;>CI73JT B.@R@74> >:ELOHI92JM 
>;71T HA-C.@MA73JIM W:->;71T H@->@75REY00 


The function works by stripping all non-digit-non-space material, then splitting on space, then
dividing the numbers into pairs, and then joining everything together.

We exclude some marks, because they are not proper cantillation accents.

In [7]:
excludedAccents = {
  '35', '45', '75', '95' # meteg
  '52', '53' # upper and lower dots
}

In [8]:
stripPat = re.compile(r'[^0-9 ]')
accentPat = re.compile(r'[0-9]{2}')

def getAccents(h):
  text = stripPat.sub('', T.text(h, fmt='text-trans-full')).split()
  items = []
  for word in text:
    accents = [ac for ac in accentPat.findall(word) if ac not in excludedAccents]
    items.append('_'.join(accents))
  return ' '.join(items)

In [6]:
for h in (h1, h2):
  print(getAccents(h))

73 74 92
71 73 71 00


# All half verses

We do all half verses, but you may choose to do only selected books.

In [10]:
books = None

In [11]:
# skip this cell if you want all books
# tweak this cell by specifying the set of books you want done (English book names)

books = {'Numbers', 'Ruth'}

In [13]:
indent(reset=True)
rows = []

for v in F.otype.s('verse'):
  (book, chapter, verse) = T.sectionFromNode(v)
  if books is not None and book not in books:
    continue
  for h in L.d(v, otype='half_verse'):
    label = F.label.v(h)
    accents = getAccents(h)
    rows.append((
      book, chapter, verse, label, accents
    ))
info(f'{len(rows)} half verses done')        

  0.55s 2603 half verses done


In [14]:
rows[0:10]

[('Numbers', 1, 1, 'A', '63 94 91 71 73 74 92'),
 ('Numbers', 1, 1, 'B', '04 63 61 74 81 91 71 73 00'),
 ('Numbers', 1, 2, 'A', '81 03 74 80 73 74 92'),
 ('Numbers', 1, 2, 'B', '74 80 73 00'),
 ('Numbers', 1, 3, 'A', '63 70 03 80 71 73 92'),
 ('Numbers', 1, 3, 'B', '71 91 73 71 00'),
 ('Numbers', 1, 4, 'A', '74 80 71 73 92'),
 ('Numbers', 1, 4, 'B', '91 71 73 00'),
 ('Numbers', 1, 5, 'A', '33_03 74 80 71 73 92'),
 ('Numbers', 1, 5, 'B', '85 73 00')]

# Export results as CSV

The result will be printed to your downloads folder.

In [15]:
csv = os.path.expanduser('~/Downloads/accentPatterns.csv')

with open(csv, 'w') as fh:
  for row in rows:
    fh.write(','.join(str(f) for f in row) + '\n')