# Hiphils with Dagesh in First Radical

First we import the necessary TF stuff

In [1]:
from tf.fabric import Fabric
TF = Fabric(locations='../text-fabric-data', modules='hebrew/etcbc4c')
api = TF.load('''
	sp nu gn ps vt vs g_word_utf8 pfm g_pfm_utf8 lex_utf8 g_vbs_utf8
''')
api.makeAvailableIn(globals())

This is Text-Fabric 2.3.2
Api reference : https://github.com/ETCBC/text-fabric/wiki/Api
Tutorial      : https://github.com/ETCBC/text-fabric/blob/master/docs/tutorial.ipynb
Data sources  : https://github.com/ETCBC/text-fabric-data
Data docs     : https://etcbc.github.io/text-fabric-data
Shebanq docs  : https://shebanq.ancient-data.org/text
Slack team    : https://shebanq.slack.com/signup
Questions? Ask shebanq@ancient-data.org for an invite to Slack
114 features found and 0 ignored
  0.00s loading features ...
   |     0.18s B g_word_utf8          from /home/jcuenod/Programming/text-fabric-data/hebrew/etcbc4c
   |     0.16s B lex_utf8             from /home/jcuenod/Programming/text-fabric-data/hebrew/etcbc4c
   |     0.13s B sp                   from /home/jcuenod/Programming/text-fabric-data/hebrew/etcbc4c
   |     0.11s B nu                   from /home/jcuenod/Programming/text-fabric-data/hebrew/etcbc4c
   |     0.09s B gn                   from /home/jcuenod/Programming/text-fabric

Compare preformatives to verbal stems

In [2]:
has_both = []
for w in F.otype.s('word'):
    preformative = F.g_pfm_utf8.v(w)
    verbal_stem = F.g_vbs_utf8.v(w)
    if (preformative is not "" and verbal_stem is not ""):
        has_both.append({
            "node": w,
            "word": F.g_word_utf8.v(w),
            "stem": F.vs.v(w),
            "pfm": preformative,
            "vbs": verbal_stem
        })
print("Number of words with both vbs and pfm in OT:", len(has_both))

Number of words with both vbs and pfm in OT: 937


Find all the hiphils

In [3]:
import re
def remove_non_root_consonants(word, pfm, vbs):
    # Will also remove all the accents/vowels until the first root cons
    prefix = pfm + vbs
    prefixWOCons = re.sub(r'[\u05D0-\u05EA\uFB2A\uFB2B\uFB4B]', "", prefix)
    number_of_consonants_in_prefix = len(prefix) - len(prefixWOCons)
    return_word = word
    for i in range(number_of_consonants_in_prefix):
        # This regex matches any accent/vowelish characters after a consonantish character
        return_word = re.sub(r'^[\u05D0-\u05EA\uFB2A\uFB2B\uFB4B][\u0590-\u05CF]*', "", return_word)
    return return_word
 
words = []
for w in F.otype.s('word'):
    if (F.vs.v(w) == "hif"):
        line = F.g_word_utf8.v(w)
        preformative = F.g_pfm_utf8.v(w)
        verbal_stem = F.g_vbs_utf8.v(w)
        trimmed_line = remove_non_root_consonants(line, preformative, verbal_stem)
        words.append({
            "node": w,
            "word": line,
            "trim": trimmed_line,
            "vbs": verbal_stem,
            "pfm": preformative,
            "lex": F.lex_utf8.v(w)
        })
print("Number of words:", len(words))

Number of words: 9407


Now we do some checks to verify that our data makes sense

In [4]:
has_dagesh = []
for w in words:
    # Everything between the first two consonants
    first_two_cons = re.match(r'^[\u05D0-\u05EA\uFB2A\uFB2B\uFB4B]([\u0590-\u05CF]*)[\u05D0-\u05EA\uFB2A\uFB2B\uFB4B]', w["trim"])
    
    # if we don't have two consonants, just match the original string
    # (it just so happens that this doesn't change the result)
    if not first_two_cons:
        matched_chars = w["trim"]
    else:
        matched_chars = first_two_cons.group(1)
    
    # Now see if we have a dagesh...
    if re.search(r'\u05BC', matched_chars):
        has_dagesh.append(w)

print("Has dagesh in 1st root consonant:", len(has_dagesh))


Has dagesh in 1st root consonant: 1606


Often the reason for the dagesh is an assimilated nun. So let's weed out those...

In [5]:
def compare_unicde(a, b):
    return a.encode('UTF-8') == b.encode('UTF-8')
starts_with_nun = 0
has_dagesh_wo_nun = []
for w in has_dagesh:
    if compare_unicde(w["lex"][0], "נ") and not compare_unicde(w["trim"][0], "נ"):
        starts_with_nun += 1
    else:
        has_dagesh_wo_nun.append(w)
        
print("Root begins with nun:", starts_with_nun)
print("Everything else:", len(has_dagesh_wo_nun))

Root begins with nun: 1453
Everything else: 153


Now we just format things beautifully and create results

In [6]:
for w in has_dagesh_wo_nun:
    print("{:<12} {}".format( "{}".format(T.sectionFromNode(w["node"])), w["word"] ), w["lex"])
    print("\t", T.text( L.d(L.u(w["node"], otype="verse")[0], otype="word") ))

('Genesis', 2, 15) יַּנִּחֵ֣הוּ נוח
	 וַיִּקַּ֛ח יְהוָ֥ה אֱלֹהִ֖ים אֶת־הָֽאָדָ֑ם וַיַּנִּחֵ֣הוּ בְגַן־עֵ֔דֶן לְעָבְדָ֖הּ וּלְשָׁמְרָֽהּ׃ 
('Genesis', 19, 16) יַּנִּחֻ֖הוּ נוח
	 וַֽיִּתְמַהְמָ֓הּ׀ וַיַּחֲזִ֨קוּ הָאֲנָשִׁ֜ים בְּיָדֹ֣ו וּבְיַד־אִשְׁתֹּ֗ו וּבְיַד֙ שְׁתֵּ֣י בְנֹתָ֔יו בְּחֶמְלַ֥ת יְהוָ֖ה עָלָ֑יו וַיֹּצִאֻ֥הוּ וַיַּנִּחֻ֖הוּ מִח֥וּץ לָעִֽיר׃ 
('Genesis', 30, 38) יַּצֵּ֗ג יצג
	 וַיַּצֵּ֗ג אֶת־הַמַּקְלֹות֙ אֲשֶׁ֣ר פִּצֵּ֔ל בָּרֳהָטִ֖ים בְּשִֽׁקֲתֹ֣ות הַמָּ֑יִם אֲשֶׁר֩ תָּבֹ֨אןָ הַצֹּ֤אן לִשְׁתֹּות֙ לְנֹ֣כַח הַצֹּ֔אן וַיֵּחַ֖מְנָה בְּבֹאָ֥ן לִשְׁתֹּֽות׃ 
('Genesis', 33, 15) אַצִּֽיגָה יצג
	 וַיֹּ֣אמֶר עֵשָׂ֔ו אַצִּֽיגָה־נָּ֣א עִמְּךָ֔ מִן־הָעָ֖ם אֲשֶׁ֣ר אִתִּ֑י וַיֹּ֨אמֶר֙ לָ֣מָּה זֶּ֔ה אֶמְצָא־חֵ֖ן בְּעֵינֵ֥י אֲדֹנִֽי׃ 
('Genesis', 39, 16) תַּנַּ֥ח נוח
	 וַתַּנַּ֥ח בִּגְדֹ֖ו אֶצְלָ֑הּ עַד־בֹּ֥וא אֲדֹנָ֖יו אֶל־בֵּיתֹֽו׃ 
('Genesis', 42, 33) הַנִּ֣יחוּ נוח
	 וַיֹּ֣אמֶר אֵלֵ֗ינוּ הָאִישׁ֙ אֲדֹנֵ֣י הָאָ֔רֶץ בְּזֹ֣את אֵדַ֔ע כִּ֥י כֵנִ֖ים אַתֶּ֑ם אֲחִיכֶ֤ם הָֽאֶחָד֙ הַנִּ֣יחוּ אִתִּ֔