In [1]:
import pyperclip
from tf.app import use
A = use('etcbc/bhsa')
F = A.TF.api.F
L = A.TF.api.L

def get_lexeme_nodes(book, chapter=None, alphabetical=False):
    ch_spec = "chapter=%d"%(chapter) if chapter else ""
    # Find lexeme-word pairs
    tuples = A.search('''
l:lex
    w:word
    /with/
    book book=%s
        chapter %s
            w
    /-/
    '''%(book, ch_spec))

    if alphabetical:
        # Sort by word feature `lex_utf8`
        tuples.sort(key=lambda t: F.lex_utf8.v(t[1]))
    else:
        # Sort by word nodes (which are in canonical order)
        tuples.sort(key=lambda t: t[1])

    # Select lexeme nodes only and remove duplicates
    lexemes = list({ l: w for (l, w) in tuples })

    print("%d lexemes found in %s %s"%(len(lexemes), book, str(chapter or "")))
    return lexemes

def table_lexemes(list, limit=None):
    limit = limit or 100000
    A.table([(l,) for l in list][0:limit])

def copy_lexemes(list, vertical=True):
    utf8 = [F.voc_lex_utf8.v(i) for i in list]
    copy_list(utf8, vertical)

def copy_list(list, vertical=True):
    # separate items with newline or tab
    sep = "\n" if vertical else "\t"
    string = sep.join(list)

    pyperclip.copy(string)

This is Text-Fabric 9.3.2
Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html

122 features found and 0 ignored


# Exodus (select chapters)

In [2]:
ex1 = get_lexeme_nodes("Exodus", 1)
copy_lexemes(ex1)

  0.51s 349 results
126 lexemes found in Exodus 1


In [3]:
ex19 = get_lexeme_nodes("Exodus", 19)
copy_lexemes(ex19)

  0.18s 542 results
132 lexemes found in Exodus 19


In [4]:
# Exodus 20-21
ex20 = get_lexeme_nodes("Exodus", 20)
ex21 = get_lexeme_nodes("Exodus", 21)
# Combine lists, removing duplicates
combined = list({ i: None
    for chapter in [ex20, ex21]
        for i in chapter
})
print(len(ex20))
print(ex20)
print()
print(len(ex21))
print(ex21)
print()
print(len(combined))
print(combined)

copy_lexemes(combined)

  0.52s 420 results
139 lexemes found in Exodus 20
  0.52s 597 results
157 lexemes found in Exodus 21
139
[1437609, 1438048, 1437605, 1437606, 1437679, 1437607, 1438181, 1437712, 1437629, 1437621, 1437817, 1437714, 1437638, 1437657, 1437639, 1437610, 1438103, 1437987, 1438084, 1437719, 1437611, 1437934, 1437615, 1437616, 1437637, 1439738, 1439739, 1437602, 1437608, 1437995, 1437640, 1437620, 1438427, 1437722, 1437624, 1438328, 1439740, 1438536, 1437893, 1437789, 1437836, 1439414, 1439741, 1438652, 1438498, 1438530, 1438565, 1437766, 1438715, 1437894, 1437747, 1439742, 1438613, 1438022, 1437630, 1439699, 1437711, 1437951, 1437709, 1437708, 1438956, 1437820, 1437944, 1438534, 1437687, 1438369, 1438471, 1437648, 1437765, 1437641, 1437682, 1438458, 1437790, 1438236, 1438718, 1437690, 1437669, 1439743, 1439744, 1438852, 1438467, 1438184, 1438914, 1439488, 1437738, 1437783, 1438927, 1438242, 1438194, 1437623, 1437813, 1438380, 1439735, 1438017, 1439745, 1437891, 1438442, 1438568, 1437644, 14

# Refinements
## Exporting other features

It's often useful to export other features besides the Hebrew form. Particularly the frequency is useful to have.

First we need a new copy function:

In [3]:
# rewrite the `copy_list` function to work with several lists in parallel
def copy_zipped_lists(lists, vertical=True):
    # separate items with newline and tab
    sep_col = "\t"
    sep_row = "\n" 

    # construct string
    if vertical:
        rows = zip(*lists)
    else:
        rows = lists

    string = ""
    for r in rows:
        string += sep_col.join(str(i) for i in r) + sep_row

    # remove trailing separator
    if string[-1:] == sep_row:
        string = string[:-1]

    pyperclip.copy(string)

lower_case = ['a', 'b', 'c', 'd', 'e', 'f']
upper_case = ['A', 'B', 'C', 'D', 'E', 'F']
numbers    = [1, 2, 3, 4, 5, 6]

In [6]:
# Test (vertical)
copy_zipped_lists([lower_case, upper_case, numbers])

In [7]:
# Test (horizontal)
copy_zipped_lists([lower_case, upper_case, numbers], False)

Next we'll upgrade the `copy_lexemes` function so that specific features can be selected for export:

In [4]:
def copy_lexemes(lexemes, features=["voc_lex_utf8"], vertical=True):
    data = []
    for f_name in features:
        # see Text-Fabric Api for details
        # e.g., getter for 'freq_lex' would be `F.freq_lex.v`
        val_getter = getattr(F, f_name).v

        val_list = [val_getter(i) for i in lexemes]
        data.append(val_list)
    
    copy_zipped_lists(data, vertical)

In [9]:
# Test: Genesis 1 (dictionary form only)
lexemes = get_lexeme_nodes("Genesis", 1)
copy_lexemes(lexemes)

  0.51s 673 results
104 lexemes found in Genesis 1


In [10]:
# Test: Genesis 1 (with vowels, without vowels, frequency)
lexemes = get_lexeme_nodes("Genesis", 1)
copy_lexemes(lexemes, ["voc_lex_utf8", "lex_utf8", "freq_lex"])

  0.18s 673 results
104 lexemes found in Genesis 1


In [11]:
# Test: Genesis 1 (with vowels, without vowels, frequency; horizontal)
lexemes = get_lexeme_nodes("Genesis", 1)
copy_lexemes(lexemes, ["voc_lex_utf8", "lex_utf8", "freq_lex"], False)

  0.52s 673 results
104 lexemes found in Genesis 1


In [12]:
# Exodus 20-21 again (this time including frequency)
ex20 = get_lexeme_nodes("Exodus", 20)
ex21 = get_lexeme_nodes("Exodus", 21)
# Combine lists, removing duplicates
combined = list({ i: None
    for chapter in [ex20, ex21]
        for i in chapter
})
copy_lexemes(combined, ["voc_lex_utf8", "freq_lex"])

  0.18s 420 results
139 lexemes found in Exodus 20
  0.53s 597 results
157 lexemes found in Exodus 21


In [13]:
A.table(A.search('''
chapter book=Exodus chapter=20
    clause    
        word sp=verb prs_ps=p1|p2|p3
'''))

  0.45s 8 results


n,p,chapter,clause,word
1,Exodus 20:2,Exodus 20,אֲשֶׁ֧ר הֹוצֵאתִ֛יךָ מֵאֶ֥רֶץ מִצְרַ֖יִם מִבֵּ֣֥ית עֲבָדִֽ֑ים׃,הֹוצֵאתִ֛יךָ
2,Exodus 20:5,Exodus 20,וְלֹ֣א תָעָבְדֵ֑ם֒,תָעָבְדֵ֑ם֒
3,Exodus 20:5,Exodus 20,עַל־שִׁלֵּשִׁ֥ים וְעַל־רִבֵּעִ֖ים לְשֹׂנְאָֽ֑י׃,שֹׂנְאָֽ֑י׃
4,Exodus 20:6,Exodus 20,וְעֹ֥֤שֶׂה חֶ֖֨סֶד֙ לַאֲלָפִ֑֔ים לְאֹהֲבַ֖י וּלְשֹׁמְרֵ֥י מִצְוֹתָֽי׃ ס,אֹהֲבַ֖י
5,Exodus 20:8,Exodus 20,לְקַדְּשֹֽׁ֗ו׃,קַדְּשֹֽׁ֗ו׃
6,Exodus 20:11,Exodus 20,וַֽיְקַדְּשֵֽׁהוּ׃ ס,יְקַדְּשֵֽׁהוּ׃ ס
7,Exodus 20:24,Exodus 20,וּבֵרַכְתִּֽיךָ׃,בֵרַכְתִּֽיךָ׃
8,Exodus 20:25,Exodus 20,וַתְּחַֽלְלֶֽהָ׃,תְּחַֽלְלֶֽהָ׃


In [14]:
# Test: Exodus 21 (with vowels, without vowels, frequency)
lexemes = get_lexeme_nodes("Exodus", 21)
copy_lexemes(lexemes, ["voc_lex_utf8", "lex_utf8", "freq_lex"])

  0.18s 597 results
157 lexemes found in Exodus 21


In [15]:
A.table(A.search('''
chapter book=Exodus chapter=21
    clause    
        word sp=verb prs_ps=p1|p2|p3
'''))

  0.39s 19 results


n,p,chapter,clause,word
1,Exodus 21:6,Exodus 21,וְהִגִּישֹׁ֤ו אֲדֹנָיו֙ אֶל־הָ֣אֱלֹהִ֔ים,הִגִּישֹׁ֤ו
2,Exodus 21:6,Exodus 21,וְהִגִּישֹׁו֙ אֶל־הַדֶּ֔לֶת אֹ֖ו אֶל־הַמְּזוּזָ֑ה,הִגִּישֹׁו֙
3,Exodus 21:6,Exodus 21,וַעֲבָדֹ֖ו לְעֹלָֽם׃ ס,עֲבָדֹ֖ו
4,Exodus 21:8,Exodus 21,אֲשֶׁר־לֹ֥ו יְעָדָ֖הּ,יְעָדָ֖הּ
5,Exodus 21:8,Exodus 21,וְהֶפְדָּ֑הּ,הֶפְדָּ֑הּ
6,Exodus 21:8,Exodus 21,לְמָכְרָ֖הּ,מָכְרָ֖הּ
7,Exodus 21:8,Exodus 21,בְּבִגְדֹו־בָֽהּ׃,בִגְדֹו־
8,Exodus 21:9,Exodus 21,וְאִם־לִבְנֹ֖ו יִֽיעָדֶ֑נָּה,יִֽיעָדֶ֑נָּה
9,Exodus 21:14,Exodus 21,לְהָרְגֹ֣ו בְעָרְמָ֑ה,הָרְגֹ֣ו
10,Exodus 21:14,Exodus 21,מֵעִ֣ם מִזְבְּחִ֔י תִּקָּחֶ֖נּוּ,תִּקָּחֶ֖נּוּ
