Add Tiberian Transcription Schema #45

charlesLoder · 2023-01-22T21:13:33Z

See discussion here

Will definitely need a test under test/schemas.

The text was updated successfully, but these errors were encountered:

johnlockejrr · 2023-01-23T11:55:43Z

I tried with tiberian schema (hebrew-transliteration/dist/schemas/tiberianKhan.js), still working on it, much to do:

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.tiberianKhan = void 0;
const additionalFeatureTransliteration = require("../rules").additionalFeatureTransliteration;
exports.tiberianKhan = {
    VOCAL_SHEVA: "ǝ",
    HATAF_SEGOL: "ɛ",
    HATAF_PATAH: "a",
    HATAF_QAMATS: "o",
    HIRIQ: "i",
    TSERE: "e",
    SEGOL: "ɛ",
    PATAH: "a",
    QAMATS: "ɔ",
    HOLAM: "o",
    QUBUTS: "u",
    DAGESH: "",
    DAGESH_CHAZAQ: true,
    MAQAF: "-",
    PASEQ: "",
    SOF_PASUQ: "",
    QAMATS_QATAN: "ɔ",
    FURTIVE_PATAH: "a",
    HIRIQ_YOD: "i:",
    TSERE_YOD: "e:",
    SEGOL_YOD: "ɛ:",
    SHUREQ: "u:",
    HOLAM_VAV: "o:",
    QAMATS_HE: "ɔ:",
    SEGOL_HE: "ɛ:",
    TSERE_HE: "e:",
    MS_SUFX: "ɔw",
    ALEF: "ʔ",
    BET: "v",
    BET_DAGESH: "b",
    GIMEL: "ʁ",
    GIMEL_DAGESH: "g",
    DALET: "ð",
    DALET_DAGESH: "d",
    HE: "h",
    VAV: "v",
    ZAYIN: "z",
    HET: "ħ",
    TET: "tˁ",
    YOD: "j",
    FINAL_KAF: "χ",
    KAF: "χ",
    KAF_DAGESH: "kʰ",
    LAMED: "l",
    FINAL_MEM: "m",
    MEM: "m",
    FINAL_NUN: "n",
    NUN: "n",
    SAMEKH: "s",
    AYIN: "ʕ",
    FINAL_PE: "f",
    PE: "f",
    PE_DAGESH: "pʰ",
    FINAL_TSADI: "sˁ",
    TSADI: "sˁ",
    QOF: "q̟",
    RESH: "ʀ̟",
    SHIN: "ʃ",
    SIN: "s",
    TAV: "θ",
    TAV_DAGESH: "tʰ",
    DIVINE_NAME: "yhwh",
    STRESS_MARKER: { location: "before-syllable", mark: "ˈ" },
    /*ADDITIONAL_FEATURES: [
            { FEATURE: "syllable", HEBREW: "[\u05D0]$", TRANSLITERATION: "" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B4]$", TRANSLITERATION: "i:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B5]$", TRANSLITERATION: "e:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B6]$", TRANSLITERATION: "ɛ:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B7]$", TRANSLITERATION: "a:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B8]$", TRANSLITERATION: "ɔ:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05B9]$", TRANSLITERATION: "o:" },
            //{ FEATURE: "syllable", HEBREW: "[\u05BB]$", TRANSLITERATION: "u:" },
            /////{ FEATURE: "cluster", HEBREW: "[\u05B1]", TRANSLITERATION: "ɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B4", TRANSLITERATION: "iʔi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B5", TRANSLITERATION: "eʔe" },
            //{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B6", TRANSLITERATION: "ɛʔɛ" }, // !!! //
            //{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B7", TRANSLITERATION: "aʔa" }, // !!! //
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B8", TRANSLITERATION: "ɔʔɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B9", TRANSLITERATION: "oʔo" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D0\u05BB", TRANSLITERATION: "uʔu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B4", TRANSLITERATION: "ihi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B5", TRANSLITERATION: "ehe" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B6", TRANSLITERATION: "ɛhɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B7", TRANSLITERATION: "aha" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B8", TRANSLITERATION: "ɔhɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B9", TRANSLITERATION: "oho" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D4\u05BB", TRANSLITERATION: "uhu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B4", TRANSLITERATION: "iħi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B5", TRANSLITERATION: "eħe" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B6", TRANSLITERATION: "ɛħɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B7", TRANSLITERATION: "aħa" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B8", TRANSLITERATION: "ɔħɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B9", TRANSLITERATION: "oħo" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D7\u05BB", TRANSLITERATION: "uħu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B4", TRANSLITERATION: "iʕi" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B5", TRANSLITERATION: "eʕe" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B6", TRANSLITERATION: "ɛʕɛ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B7", TRANSLITERATION: "aʕa" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B8", TRANSLITERATION: "ɔʕɔ" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B9", TRANSLITERATION: "oʕo" },
            { FEATURE: "word", HEBREW: "\u05B0\u05E2\u05BB", TRANSLITERATION: "uʕu" },
            { FEATURE: "word", HEBREW: "\u05B0\u05D9", TRANSLITERATION: "i:" }
    ],*/
    ADDITIONAL_FEATURES: [
      {
        FEATURE: "cluster",
        HEBREW: "\u05B0",
        TRANSLITERATION: (cluster, transliteration, schema) => {
          const shewa = new RegExp(transliteration, "u");
          const clusterText = cluster.text;
          /**
           * @type {Cluster}
           */
          const next = cluster.next;
          const gutturalYodVowel = /[אהחעי]([\u{05B1}-\u{05BB}\u{05C7}])/u;
          const match = next.text.match(gutturalYodVowel);
          if (shewa.test(clusterText) && match) {
            return additionalFeatureTransliteration(clusterText, shewa, match[1], schema);
          }
          return clusterText;
        }
      }
    ],
    longVowels: false,
    qametsQatan: false,
    sqnmlvy: true,
    wawShureq: false,
    article: true,
    allowNoNiqqud: false,
    strict: true
};

johnlockejrr · 2023-01-23T12:02:19Z

Sample for what we should accomplish:
Genesis 1:1-4

baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ
vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim
vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ
vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ

charlesLoder · 2023-01-26T22:57:15Z

@johnlockejrr

Thanks for all this!

In the branch with the new callback function for additional features, the callback gives access to the Word, Syllable, or Cluster objects and their newly added properties in v0.13.x.

Right now, I'm running into a bit of a wall. Calling something like syllable.vowelName could return something that matches a schema property. I was envisioning it being used like this:

{
  FEATURE: "syllable",
  HEBREW: "\u{05B0}",
  TRANSLITERATION: (syllable, hebrew, schema) => {
    const next = syllable.next;
    if(next && next.vowelName) {
      // renamed function below from additionalFeatureTransliteration
      return replaceAndTransliterate(syllable.text, new Regex(hebrew, "u"), schema[next.vowelName], schema);
    }
  }
}

The problem, however, is this schema[next.vowelName] which lacks type safety...

Not totally sure how to resolve other than merging these two packages into a monorepo or heavily refactoring the schema interface — probably the latter

johnlockejrr · 2023-01-27T12:40:35Z

Probably the latter I think too.

charlesLoder · 2023-01-28T06:58:15Z

Refactoring allows for something a little more elegant:

const heb = require("./dist/index");
const rules = require("./dist/rules");

const result = heb.transliterate("בְּרֵאשִׁ֖ית וַיַּבְדֵּל", {
  ADDITIONAL_FEATURES: [
    {
      // matches any sheva in a syllable that is NOT preceded by a vowel character
      HEBREW: "(?<![\u{05B1}-\u{05BB}\u{05C7}].*)\u{05B0}",
      FEATURE: "syllable",
      TRANSLITERATION: function (syllable, _hebrew, schema) {
        const next = syllable.next;
        // discrepancy here: in havarotjs SHEVA is simply the character
        // whereas transliteration is concerned with a specific sheva, a vocal sheva
        const nextVowel = next.vowelName === "SHEVA" ? "VOCAL_SHEVA" : next.vowelName;

        if (next && nextVowel) {
          const vowel = schema[nextVowel] || "";
          // replaceAndTransliterate is an internal helper function
          return rules.replaceAndTransliterate(syllable.text, new RegExp("\u{05B0}", "u"), vowel, schema);
        }

        return syllable.text;
      }
    }
  ]
});

// bērēʾšît wayyabdēl

Though the regex is a little more complicated, it ensures that the sheva being matched is likely a vocal one.

thinking out loud: the ADDITIONAL_FEATURES property was originally designed with orthographic features in mind. Perhaps an ADDITIONAL_RULES could be a possible future property where the rule could match on something simpler like
syl.vowelName === "SHEVA"

johnlockejrr · 2023-01-31T15:35:21Z

bērēʾšît wayyabdēl would be wrong because shewa is a short vowel and the b in the second word is spirantizated to v, in Tiberian transcription proposed by Khan we should have baʀ̟eːˈʃiːiθ waɟɟav'deːel or if you want something like barē'šît wayyav'dēl

charlesLoder · 2023-02-16T02:27:05Z

@johnlockejrr

Checkout this branch for tiberian.

If you could look through the tests, and let me know what is incorrect.

Feel free to push changes or just comment here

johnlockejrr · 2023-02-16T07:02:40Z

Ok. I'll do that

…

On Thu, 16 Feb 2023 at 03:27, Charles Loder ***@***.***> wrote: @johnlockejrr <https://github.com/johnlockejrr> Checkout this branch for tiberian <https://github.com/charlesLoder/hebrew-transliteration/tree/tiberian>. If you could look through the tests, and let me know what is incorrect. Feel free to push changes or just comment here — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHVWAGUJDXVGCI6ZOU3WXWGAHANCNFSM6AAAAAAUDGEC2M> . You are receiving this because you were mentioned.Message ID: ***@***.***>

johnlockejrr · 2023-02-16T13:24:07Z

Quite close!!! Need some more little work but we are almost there:

hebrew-transliteration output:

bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ ʔɛloˈhi:m ˈʔeθ haʃʃˈmajim vǝˈʔeθ hɔʔɔʀ̟ɛsˁ
vǝhɔˈʔɔʀ̟ɛsˁ hɔjˈθɔ: ˈθohu: vɔˈvohu: vǝˈħoʃɛχ ʕal-pʰǝˈne: θǝˈho:m vǝʀ̟u:aħ ʔɛloˈhi:m mǝʀ̟aˈħɛfɛθ ʕal-pʰǝˈne: hammɔjim
vaˈjjoʔmɛʀ̟ ʔɛloˈhi:m jǝˈhi: ˈʔo:ʀ̟ vajǝhi:-ʔo:ʀ̟
vaˈjjaʀ̟ʔ ʔɛloˈhi:m ʔɛθ-hɔˈʔo:ʀ̟ kʰi:-ˈtˁo:v vajjavˈdel ʔɛloˈhi:m ˈbe:n hɔˈʔo:ʀ̟ u:ˈve:n haħoʃɛχ
vajjiq̟ˈʀ̟ɔʔ ʔɛloˈhi:m lɔˈʔo:ʀ̟ ˈjo:m vǝlaˈħoʃɛχ ˈq̟ɔʀ̟ɔʔ ˈlɔjlɔ: vajǝhi:-ˈʕɛʀ̟ɛv vajǝhi:-ˈvoq̟ɛʀ̟ ˈjo:m ʔɛħɔð

Geoffrey Khan:

baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ
vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim
vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ
vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ
vaɟɟiqˈʀ̟ɔː ʔɛloːˈhiːim lɔːˈʔoːoʀ̟ ˈjoːom valaːˈħoːʃɛχ ˈq̟ɔʀ̟ɔː ˈlɔːɔjlɔː ˌvaˑjhiː-ˈʕɛːʀ̟ɛv ˌvaˑjhiː-ˈvoːqɛ̟ʀ̟ ˈjoːom ʔɛːˈħɔːɔð

NOTES:

We should:

ammend in the schema VOCAL_SHEVA: "ǝ" to VOCAL_SHEVA: "a" (my bad!)
YOD with DAGGESH is pronounced ɟɟ and not jj
in Tiberian Hebrew vocalization the vowels represent qualitative distinctions not quantitative, the vowels are long when:
(i) in a stressed syllable or
(ii) in an open unstressed syllable.
That's why vocal SHEVA even exists, is a full wovel that can't be really accentuated or made long (can't even form a sillable by
itself - NOTE: even though, we can make syllables with it but as a strict Tiberian rule we shouldn't).
eg. If there wasn't a vocal shewa invented, they would have written בְּרֵאשִׁית as בַּרֵאשִׁית but with the rule of vowel
lenghtening that would have give the reciter something like ba:ʀ̟eːˈʃiːiθ with long PATACH in open syllable, with SHEVA we
have baʀ̟eːˈʃiːiθ.
in an closed accentuated syllable the vowel is extra long (iːi in ʔɛloːˈhiːim or eːe in ˈʔeːeθ), in Khan's words "when a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant", e.g. דָּבָר [dɔːˈvɔ:ɔʀ̟], [ʃɔːˈmɑːɑʀ̟].
the epenthetic vowel in glide was pronounced like: רוּחַ [ˈʀ̟uːwaħ], שִׂיחַ [ˈsiːjaħ] etc.
quiescent ALEPH in bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ (and elsewhere) should be dropped.
not lastly the rules of SHEVA, I quote Khan:

The shewa (שְׁוָא) sign (אְ) in the Tiberian vocalization system was read either as a vowel or as zero
When shewa was read as vocalic, its quality in the Tiberian tradition was by default the same as that of the pataḥ vowel sign, i.e., the maximally low vowel [a]
e.g. תְּכַסֶּה [tʰaχasˈsɛː] "you (ms) cover"
מְדַּבְּרִים [maðabbaˈʀ̟iːim] "speaking (mpl)"

In the Tiberian tradition, when vocalic shewa occurs before a guttural consonant or the letter yod, it was realized with a different quality through an assimilatory process
(i) before a guttural (אהחע) it was realized as a short vowel with the quality of the vowel on the guttural
e.g. בְּעֶרְכְּךָ [bɛʕɛʀ̟kʰaˈχɔː] "by your evaluation"
וְהָיָה [vɔhɔːˈjɔː] "and it became"
בְּאֵר [beˈʔeːeʀ̟] "well"
מְאוֹד [moˈʔoːoð] "very"
מְחִיר [miˈħiːiʀ̟] "price"
מְעוּכָה [muʕuːˈχɔː] "pressed"
(ii) before yod, it was realized as a short vowel with the quality of short ḥireq [i]
e.g. בְּיוֹם [biˈjoːom] "on the day"
לְיִשְׂרָאֵל [lijisrˁɔːˈʔeːel] "to Israel"
תְּדַמְּיוּן [tʰaðammiˈjuːun] "you liken (mpl)"

The shewa sign is combined with some of the basic vowel signs to form the so-called ḥaṭef signs
(i) ḥaṭef pataḥ (אֲ) [a]
(ii) ḥaṭef segol (אֱ) [ɛ]
(iii) ḥaṭef qameṣ (אֳ) [ɔ]
In such signs the vocalic reading of the shewa is made explicit and also its quality
The default pronunciation of vocalic shewa with the quality of [a] was equivalent to that of the ḥaṭef pataḥ sign (אֲ)
Both the vocalic shewa and the vowels expressed by ḥaṭef signs were short vowels that, in principle, had the same quantity as short vowels in closed unstressed syllables, which were represented in standard Tiberian vocalization by a simple vowel sign.

charlesLoder · 2023-02-17T04:57:13Z

Let me take these a little at a time.

ammend in the schema VOCAL_SHEVA: "ǝ" to VOCAL_SHEVA: "a" (my bad!)

Ok, that one is easy.

YOD with DAGGESH is pronounced ɟɟ and not jj

I think I got this correct, see test

hebrew-transliteration/test/schemas/tiberian.test.ts

Line 71 in 5d8c053

    
                 ${"yod with dagesh"}                 | ${"וַיִּלָּפֵ֑ת"} | ${"vaɟɟillɔˈfeθ"}

quiescent ALEPH in bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ (and elsewhere) should be dropped.

That makes sense. See tests on the following lines, and let me know if they're correct at least in regards to the aleph:

hebrew-transliteration/test/schemas/tiberian.test.ts

Line 69 in 251b4ce

    
                 ${"dagesh chazaq - not BeGaDKePhaT"} | ${"מִנְּזָר֜"}    | ${"minnaˈzɔʀ̟"}

and

hebrew-transliteration/test/schemas/tiberian.test.ts

Line 103 in 251b4ce

${"holem vav"} | ${"ס֣וֹא"} | ${"ˈso:"}

The rest will take a little more time to get to.

johnlockejrr · 2023-02-17T07:42:51Z

Yes, you are correct!

Forgot about DIVINE_NAME: "yhwh", it was pronounced according to the vowels written:
יֱהוִה֙ [ʔɛloːˈhiːim]
יְהוָֹ֤ה [ʔaðo:ˈnɔ:j]

charlesLoder · 2023-03-04T12:44:12Z

the epenthetic vowel in glide was pronounced like: רוּחַ [ˈʀ̟uːwaħ], שִׂיחַ [ˈsiːjaħ] etc.

See test:

hebrew-transliteration/test/schemas/tiberian.test.ts

Line 56 in da40956

    
                 ${"furtive patach, chet preceded by vav "} | ${"ר֑וּחַ"}    | ${"ˈʀ̟u:waħ"}

Forgot about DIVINE_NAME: "yhwh", it was pronounced according to the vowels written:

That one is easy enough:

hebrew-transliteration/src/schemas/tiberian.ts

Lines 66 to 67 in da40956

    
           DIVINE_NAME: "ʔaðo:ˈnɔ:j", 
        
           DIVINE_NAME_ELOHIM: "ʔɛloːˈhiːim",

Still have to work on the long vowels and sheva.

Had a baby a few months ago, hence the stop-and-go work on this

johnlockejrr · 2023-03-04T19:42:30Z

Splendid! Now we are even closer. Good work @charlesLoder

Congratulations on the baby!

charlesLoder · 2023-03-04T20:04:46Z

Just realizing I forgot to add a test for שִׂיחַ [ˈsiːjaħ]

charlesLoder · 2023-03-04T20:34:26Z

Take a look at all these, and let me know if I'm missing something.

hebrew-transliteration/test/schemas/tiberian.test.ts

Lines 52 to 64 in 16480b5

    
             describe("furtive", () => { 
        
               test.each` 
        
                 description                                | hebrew           | transliteration 
        
                 ${"furtive patach, chet"}                  | ${"נֹ֖חַ"}       | ${"ˈnoaħ"} 
        
                 ${"furtive patach, chet preceded by vav "} | ${"ר֑וּחַ"}      | ${"ˈʀ̟u:waħ"} 
        
                 ${"furtive patach, ayin"}                  | ${"כִּשְׁמֹ֤עַ"} | ${"kʰiʃˈmoaʕ"} 
        
                 ${"furtive patach, ayin preceded by vav"}  | ${"רָקִ֖יעַ"}    | ${"ʀ̟ɔˈq̟i:jaʕ"} 
        
                 ${"furtive patach, he"}                    | ${"גָּבֹ֗הַּ"}   | ${"gɔˈvoah"} 
        
               `("$description", (inputs: Inputs) => { 
        
                 const { hebrew, transliteration } = inputs; 
        
                 expect(transliterate(hebrew, schema)).toBe(transliteration); 
        
               }); 
        
             });

What about a vav/yod before a he (not even sure if that happens)?

johnlockejrr · 2023-03-05T09:31:27Z

All seem right, besides the long vowels of course.
גָּבֹ֗הַּ gɔˈvo:ah כִּשְׁמֹ֤עַ kʰiʃˈmo:aʕ נֹ֖חַ ˈno:aħ

Summary:

SHEVA never long, never accented
any vowel long when accented even if the syllable is closed
any vowel long when in open syllable
NOTE:
A vowel in an unstressed closed syllable was, in principle, short. If, however, it was followed by a series of contiguous consonants of relatively weak articulation (e.g. אהעחינל ʾhʿḥynl), then the vowel was sometimes lengthened, even when not stressed. This occurred in certain prefixes of the verbs היה hyh ‘be’ and חיה ḥyh ‘live’, namely the ḥireq of prefixes before he or ḥet, e.g. יִהְיֶ֫ה [jiːhˈjɛː] ‘he will be’, and the pataḥ of the conjunctive prefix וַ wa- before yod, e.g. וַיְהִ֫י [vaːjˈhiː] ‘and it was’.
Such lengthening is occasionally found elsewhere and is marked by the gaʿya sign, e.g. הֲשָׁ֣מַֽע עָם֩ [haˈʃɔːmaːʕ ˈʕɔːm] ‘did any people hear?’ (Deut. 4.33), שְׁמַֽע־נָ֤א [ʃamaːʕ-ˈnɔː] ‘listen’ (1 Sam. 28.22). The intention of the lengthening of the unstressed vowel in such contexts was, it seems, to ensure that adjacent weak letters were not elided in the reading.
When a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant
e.g. דָּבָר [dɔːˈvɔ:ɔʀ̟]
שָׁמַר [ʃɔːˈmɑːɑʀ̟]

What about a vav/yod before a he (not even sure if that happens)?
Not sure I'm following.

NOTE:

Many words carry a secondary stress in addition to the main stress (fortunatelly this is noted with the cantillation marks), e.g. הָ֣אָדָ֔ם [ˌhɔːʔɔːˈðɔːm] ‘the man’ (Gen. 2.19), נִֽתְחַכְּמָ֖ה [ˌniːθḥakkaˈmɔː] ‘let us deal wisely’ (Exod. 1.10).

charlesLoder · 2023-03-06T01:49:07Z

What about a vav/yod before a he (not even sure if that happens)?
Not sure I'm following.

The furtive patach tests have a vav or yod before a chet or ayin. I'm trying to think if there are any words with a furtive patach before a he (e.g. גָּבֹ֗הַּ), where the he is preceded by a vav or yod.

Many words carry a secondary stress in addition to the main stress (fortunatelly this is noted with the cantillation marks), e.g. הָ֣אָדָ֔ם [ˌhɔːʔɔːˈðɔːm] ‘the man’ (Gen. 2.19), נִֽתְחַכְּמָ֖ה [ˌniːθḥakkaˈmɔː] ‘let us deal wisely’ (Exod. 1.10).

This would be a feature to build out. I also really need to update the isAccented property on the Syllable object.

Will look at vowel length next

johnlockejrr · 2023-03-06T10:40:44Z

What about a vav/yod before a he (not even sure if that happens)?
It happens: מַגְבִּ֥יהַּ תַּגְבִּ֣יהַּ יַגִּ֥יהַּ יַגְבִּ֣יהַּ אֱלֹ֨והַּ
I will try to find with vav too, I think there are. EDIT: found in BHS only אֱלֹ֨והַּ.
Other patach furtives: מָנֹ֜וחַ לָשׂ֥וּחַ יֵשׁ֡וּעַ אֲבִישׁ֥וּעַ וּמַלְכִּישׁ֑וּעַ שְׁלִ֔יחַ רֵ֣יח

johnlockejrr · 2023-03-06T16:33:02Z

Also, don't forget about the SHEVA rules when you got time.

In the Tiberian tradition, when vocalic shewa occurs before a guttural consonant or the letter yod, it was realized with a different quality through an assimilatory process
(i) before a guttural (אהחע) it was realized as a short vowel with the quality of the vowel on the guttural
e.g. בְּעֶרְכְּךָ [bɛʕɛʀ̟kʰaˈχɔː] "by your evaluation"
וְהָיָה [vɔhɔːˈjɔː] "and it became"
בְּאֵר [beˈʔeːeʀ̟] "well"
מְאוֹד [moˈʔoːoð] "very"
מְחִיר [miˈħiːiʀ̟] "price"
מְעוּכָה [muʕuːˈχɔː] "pressed"
(ii) before yod, it was realized as a short vowel with the quality of short ḥireq [i]
e.g. בְּיוֹם [biˈjoːom] "on the day"
לְיִשְׂרָאֵל [lijisrˁɔːˈʔeːel] "to Israel"
תְּדַמְּיוּן [tʰaðammiˈjuːun] "you liken (mpl)"

charlesLoder · 2023-03-26T02:07:29Z

@johnlockejrr

Another round of work.

More furtive tests
Take a look at these. They should be correct in terms of being preceded by a vav or yod. The long vowels aren't correct in this commit
db97c62

Epenthetic vowel

When a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant

These long vowels are going to be tricky....

See the updated tests here
37bbdc1

Could you comment on each line whether it is correct or not. A simple 👍 if it's correct, and if it's not correct, then comment with the correct value.

johnlockejrr · 2023-03-28T13:16:48Z

I have commented on not correct ones, I hope I didn't make any mistakes, I could ask Khan to correct but maybe a little later.

johnlockejrr · 2023-04-04T14:06:09Z

What's the latest branch with Tiberian Schema?

johnlockejrr · 2023-04-04T14:13:00Z

Tried on the latest. Genesis 1

baʀ̟eʃiːθ bɔˈʀ̟ɔ ʔɛloˈhiːm ˈʔeθ haʃʃˈmajim vaˈʔeθ hɔʔɔʀ̟ɛsˁ
vahɔˈʔɔʀ̟ɛsˁ hɔjˈθɔː ˈθoˈhuː vɔˈvohuː vaˈħoʃɛχ ʕal-pʰaˈneː θaˈhoːm vaˈʀ̟uːwaħ ʔɛloˈhiːm maʀ̟aˈħɛfɛθ ʕal-pʰaˈneː hammɔjim
vaˈɟɟo֥mɛʀ̟ ʔɛloˈhiːm jaˈhiː ˈʔoːʀ̟ vajahiː-ʔoːʀ̟
vaˈɟɟa֧ʀ̟ ʔɛloˈhiːm ʔɛθ-hɔˈʔoːʀ̟ kʰiː-ˈtˁoːv vaɟɟavˈdel ʔɛloˈhiːm ˈbeːn hɔˈʔoːʀ̟ uːˈveːn haħoʃɛχ
vaɟɟiq̟ˈʀ̟ɔ ʔɛloˈhiːm lɔˈʔoːʀ̟ ˈjoːm valaˈħoʃɛχ ˈq̟ɔʀ̟ɔ ˈlɔjlɔː vajahiː-ˈʕɛʀ̟ɛv vajahiː-ˈvoq̟ɛʀ̟ ˈjoːm ʔɛħɔð

Khan:

baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ
vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim
vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ
vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ
vaɟɟiqˈʀ̟ɔː ʔɛloːˈhiːim lɔːˈʔoːoʀ̟ ˈjoːom valaːˈħoːʃɛχ ˈq̟ɔʀ̟ɔː ˈlɔːɔjlɔː ˌvaˑjhiː-ˈʕɛːʀ̟ɛv ˌvaˑjhiː-ˈvo:q̟ɛʀ̟ ˈjoːom ʔɛːˈħɔːɔð

charlesLoder · 2023-04-04T16:41:35Z

@johnlockejrr

Just updated the branch.

I'm struggling a bit with the vowel length stuff.

The most recent commit fro Gen 1:1-5 produces:

baʀ̟eːˈʃiːijθ bɔːˈʀ̟ɔːɔ ʔɛːloːˈhiːijm ˈʔeːeθ haʃɔːˈmaːjim vaˈʔeːeθ hɔːʔɔːˈʀ̟ɛːɛsˁ
vahɔːˈʔɔːʀ̟ɛsˁ hɔjˈθɔːh ˈθoːˈhuː vɔːˈvoːhuː vaˈħoːʃɛχ ʕal-pʰaˈneːj θaˈhoːovm vaˈʀ̟uːwaħ ʔɛːloːˈhiːijm maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneːj hamɔːˈjiːim
vaˈɟɟoː֥mɛʀ̟ ʔɛːloːˈhiːijm jaˈhiːj ˈʔoːovʀ̟ vaːjahiːj-ˈʔoːovʀ̟
vaˈɟɟa֧ʀ̟ ʔɛːloːˈhiːijm ʔɛθ-hɔːˈʔoːovʀ̟ kʰiːj-ˈtˁoːovv vaɟɟavˈdeːel ʔɛːloːˈhiːijm ˈbeːejn hɔːˈʔoːovʀ̟ uːˈveːejn haːħoːˈʃɛχ
vaɟɟiq̟ˈʀ̟ɔːɔ ʔɛːloːˈhiːijm lɔːˈʔoːovʀ̟ ˈjoːovm valaːˈħoːʃɛχ ˈq̟ɔːʀ̟ɔ ˈlɔjlɔːh vaːjahiːj-ˈʕɛːʀ̟ɛv vaːjahiːj-ˈvoːq̟ɛʀ̟ ˈjoːovm ʔɛːˈħɔːɔð

Someways it's closer, other ways it's way off

johnlockejrr · 2023-04-04T18:44:53Z

Yes, way closer! We are on the right path :)

johnlockejrr · 2023-04-04T18:50:25Z

Same branch gave me this for Gen 1:1-5:

baʀ̟eːʃiːθ bɔːˈʀ̟ɔːɔ ʔɛːloːˈhiːijm ˈʔeːeθ haʃʃˈmaːjim vaˈʔeːeθ hɔːʔɔːʀ̟ɛsˁ
vahɔːˈʔɔːʀ̟ɛsˁ hɔjˈθɔːh ˈθoːˈhuː vɔːˈvoːhuː vaˈħoːʃɛχ ʕal-pʰaˈneːj θaˈhoːovm vaˈʀ̟uːwaħ ʔɛːloːˈhiːijm maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneːj hamɔːjim
vaˈɟɟoː֥mɛʀ̟ ʔɛːloːˈhiːijm jaˈhiːj ˈʔoːovʀ̟ vaːjahiːj-ʔoːʀ̟
vaˈɟɟa֧ʀ̟ ʔɛːloːˈhiːijm ʔɛθ-hɔːˈʔoːovʀ̟ kʰiːj-ˈtˁoːovv vaɟɟavˈdeːel ʔɛːloːˈhiːijm ˈbeːejn hɔːˈʔoːovʀ̟ uːˈveːejn haːħoːʃɛχ
vaɟɟiq̟ˈʀ̟ɔːɔ ʔɛːloːˈhiːijm lɔːˈʔoːovʀ̟ ˈjoːovm valaːˈħoːʃɛχ ˈq̟ɔːʀ̟ɔ ˈlɔjlɔːh vaːjahiːj-ˈʕɛːʀ̟ɛv vaːjahiːj-ˈvoːq̟ɛʀ̟ ˈjoːovm ʔɛːħɔð

One note (or two), the prolonged vowel appears only in accented closed syllable so not in hɔːʔɔːˈʀ̟ɛːɛsˁ that should be hɔː'ʔɔːʀ̟ɛsˁ, in bɔːˈʀ̟ɔːɔ we should have only bɔːˈʀ̟ɔː because Aleph is quiescent so it doesn't prolong the already long vowel.
We should also get rid of the Yod as mater e.g. uːˈveːejn that should be wuˈveːen or ʔɛːloːˈhiːijm that should be ʔɛloːˈhiːim etc.
Also the quality of the Sheva before gutturals and Yod: not vaˈħoːʃɛχ but voˈħoːʃɛχ, not vaˈʔeːeθ but veˈʔeːeθ etc.

charlesLoder · 2023-04-13T20:37:54Z

Ok, some more progress is being made, but now I'm hitting up against some deeper issues related to the syllabification package:

And some other issues I'm still trying to figure out.

I'm going to remove this from the v2.4.0 milestone so I can create another release and update the site.

Once I make more substantial changes to the syllabification package, I'll return to this.

It is, however, getting much closer! For Gen 1:1-5 I'm seeing a lot of the same issues occur, so much of it should be resolved soon.

I'm also working on a book project soon so that may take time away from this (too many irons in the fire! 🔥 )

charlesLoder · 2023-12-27T22:04:42Z

@johnlockejrr , nah, the holidays are kicking my butt

charlesLoder · 2023-12-29T16:40:19Z

If you haven't noticed, I work in starts and stops :)

This issue in the syllabication package has been the blocker. Had to dig a little deeper into the accents

Unblocked this.

Going forward, I'm only going to be using Hebrew text from Sefaria for testing as it has the best ta'amim. The text they use, Miqra `al pi ha-Mesorah, has accent helpers which allow for more accurate stress marker placement.

Obviously, I'm taking the happiest path and not dealing with edge cases, but I'd rather move forward.

Hopefully, I'll incorporate Sefaria's text into the web app one day.

Moving back to this repo when I get some time!

johnlockejrr · 2023-12-29T18:26:46Z

Glad to hear it! Yes, MAM text is pretty good, some typos nonetheless but is an open community and improving. The text is based on what is left from Aleppo Codex and other Firkovich fragments to fill the gaps. Mikraot Gedolot HaKeter (https://www.mgketer.org/) have a good text also online based on Aleppo Codex. If in the future you need any help with that let me know, I have it and other versions in SQL databases, csv etc.

…

On Fri, 29 Dec 2023 at 17:40, Charles Loder ***@***.***> wrote: If you haven't noticed, I work in starts and stops :) This issue <charlesLoder/havarotjs#147> in the syllabication package has been the blocker. Had to dig a little deeper into the accents Unblocked this. Going forward, I'm only going to be using Hebrew text from Sefaria for testing as it has the best ta'amim. The text the use, Miqra `al pi ha-Mesorah, has accent helpers which allow for more accurate stress marker placement. Obviously, I'm taking the happiest path and not dealing with edge cases, but I'd rather move forward. Hopefully, I'll incorporate Sefaria's text into the web app one day. Moving back to this repo when I get some time! — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHUUD5MQME6FPP6XMELYL3W75AVCNFSM6AAAAAAUDGEC2OVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTQNZSGIYDSNZZGE> . You are receiving this because you were mentioned.Message ID: ***@***.***>

charlesLoder · 2024-01-01T04:22:20Z

The latest version fixes a few issues including alephs being doubled.

As for the resh, this package doesn't work with any lexical information.

Honestly, I didn't even know there were that many occurrences of a resh with a dagesh.

charlesLoder · 2024-01-17T03:34:38Z

@johnlockejrr when you get a chance, let me know if you see any issues with the latest version.

johnlockejrr · 2024-01-17T12:59:50Z

Sure, I'll get back to you

johnlockejrr · 2024-01-17T13:23:44Z

Still the issues I told you above are still there, in Obadia as an example:
2 vaħaʁveː-sˈsɛːlaʕ should be vaħaʁveː-sˈsɛːlaʕ because the maqqef binds the words together that should be treated as one word and the samekh have dagesh forte. (בְחַגְוֵי־סֶּ֖לַע)
5 ʔim-ˈʃoːoððeː should be ʔim-ˈʃoːoðaðeː beacuse the sheva under the first ð is vocal (אִם־שֹׁ֣ודְדֵי).

I'll do more tests.

johnlockejrr · 2024-01-17T13:56:45Z

Gen. 1:7 ʔɛθ-ˌhɔːʀ̟ɔːq̟iːˈjaʕ should be ʔɛθ-ˌhɔːʀ̟ɔː'q̟iːjaʕ, furtiv patach is never accented. אֶת־הָֽרָקִיעַ֒
Note: I'm not sure why here SEGOLTA is sitting on the AYN because the furtive patach can't get accentuated, MAM has here 2 SEGOLTA, one above the QOF and one above the AYN (אֶת־הָרָקִ֒יעַ֒), I think is a typo because I never seen any manuscript having 2 SEGOLTAS here.

johnlockejrr · 2024-01-17T14:11:13Z

Comparison between Khan and our output.

Khan has some typos though... as an example he reads עֹֽשֶׂה־פְּרִ֛י two times without maqqef

charlesLoder · 2024-01-18T04:16:12Z

2 vaħaʁveː-sˈsɛːlaʕ should be vaħaʁveː-sˈsɛːlaʕ because the maqqef binds the words together that should be treated as one word and the samekh have dagesh forte. (בְחַגְוֵי־סֶּ֖לַע)

Ok, that was mentioned back here too. Let me take a look...

5 ʔim-ˈʃoːoððeː should be ʔim-ˈʃoːoðaðeː beacuse the sheva under the first ð is vocal (אִם־שֹׁ֣ודְדֵי).

I'll have to research more, but I thought in Tiberian the sheva would still be silent.

Note: I'm not sure why here SEGOLTA is sitting on the AYN because the furtive patach can't get accentuated, MAM has here 2 SEGOLTA, one above the QOF and one above the AYN (אֶת־הָרָקִ֒יעַ֒), I think is a typo because I never seen any manuscript having 2 SEGOLTAS here.

The segolta accent is always postpositive, see this helpful article.

MAM adds accent helpers so when a ta'am falls on an unaccented syllable, another is added for clarity. I added some pretty extensive tests in the syllabification package for all this. These little helpers are one of the reasons I decided to use MAM as the primary text for testing

johnlockejrr · 2024-01-18T07:14:53Z

5 ʔim-ˈʃoːoððeː should be ʔim-ˈʃoːoðaðeː beacuse the sheva under the first ð is vocal (אִם־שֹׁ֣ודְדֵי). In Hebrew you can't double a spirant and here that would be the output, and remember that the sheva between two identical consonants is almost always vocal, with some exceptions.

…

On Thu, 18 Jan 2024 at 05:16, Charles Loder ***@***.***> wrote: 2 vaħaʁveː-sˈsɛːlaʕ should be vaħaʁveː-sˈsɛːlaʕ because the maqqef binds the words together that should be treated as one word and the samekh have dagesh forte. (בְחַגְוֵי־סֶּ֖לַע) Ok, that was mentioned back here <#45 (comment)> too. Let me take a look... 5 ʔim-ˈʃoːoððeː should be ʔim-ˈʃoːoðaðeː beacuse the sheva under the first ð is vocal (אִם־שֹׁ֣ודְדֵי). I'll have to research more, but I thought in Tiberian the sheva would still be silent. Note: I'm not sure why here SEGOLTA is sitting on the AYN because the furtive patach can't get accentuated, MAM has here 2 SEGOLTA, one above the QOF and one above the AYN (אֶת־הָרָקִ֒יעַ֒), I think is a typo because I never seen any manuscript having 2 SEGOLTAS here. The segolta accent is always postpositive, see this helpful article <https://assets.cambridge.org/97811084/79936/excerpt/9781108479936_excerpt.pdf> . MAM adds accent helpers so when a ta'am falls on an unaccented syllable, another is added for clarity. I added some pretty extensive tests <https://github.com/charlesLoder/havarotjs/blob/main/test/syllable.isAccented.test.ts> in the syllabification package for all this. These little helpers are one of the reasons I decided to use MAM as the primary text for testing — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHUO5KM4QYHMIPD52JTYPCOZPAVCNFSM6AAAAAAUDGEC2OVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTQOJXG43DINZYGU> . You are receiving this because you were mentioned.Message ID: ***@***.***>

johnlockejrr · 2024-01-18T13:26:27Z

Thanks for the heads up about MAM. I'm not really sure what they did is very masoretical :) I mean that breaks all the rules of the Masora, I think I'll move to something more reliable like Mikraot Gedolot HaKeter based like MAM on what's left of the Aleppo Codex and other old same family (Bar-Asher) manuscripts.

johnlockejrr · 2024-01-18T13:58:56Z

About sheva na in Obadia 5, the Mikraot Gedolot of mg.alhatorah.org says is sheva na. In the text source:
<span data-lexicon="7703">שׁ֣וֹ<span class="sheva-na">דְ</span>דֵי</span>

charlesLoder · 2024-01-19T04:10:59Z

5 ʔim-ˈʃoːoððeː should be ʔim-ˈʃoːoðaðeː beacuse the sheva under the first
ð is vocal (אִם־שֹׁ֣ודְדֵי).

In Hebrew you can't double a spirant and here that would be the output, and
remember that the sheva between two identical consonants is almost always
vocal, with some exceptions.

Yup! I just wasn't sure if that was one of those rules that was taught in Hebrew classes that didn't correspond to actual Tiberian (e.g. like distinguishing between qamets qatan and qamets gadol, which Tiberian does not do).

But, in Khan I.2.5.7.3:

One notable case is a shewa under the first of a pair of identical consonants, which was vocalic if the preceding vowel was long,

So I got to make some updates, but I think it will be simple

johnlockejrr · 2024-01-19T06:54:43Z

Yes, you are right. The output overall is amazing and accurate, only some minor tweaks.

…

On Fri, 19 Jan 2024 at 05:11, Charles Loder ***@***.***> wrote: 5 ʔim-ˈʃoːoððeː should be ʔim-ˈʃoːoðaðeː beacuse the sheva under the first ð is vocal (אִם־שֹׁ֣ודְדֵי). In Hebrew you can't double a spirant and here that would be the output, and remember that the sheva between two identical consonants is almost always vocal, with some exceptions. Yup! I just wasn't sure if that was one of those rules that was taught in Hebrew classes that didn't correspond to actual Tiberian (e.g. like distinguishing between qamets qatan and qamets gadol, which Tiberian does not do). But, in Khan I.2.5.7.3: One notable case is a shewa under the first of a pair of identical consonants, which was vocalic if the preceding vowel was long, So I got to make some updates, but I think it will be simple — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHQAH6ZCHYUFE5IAJ73YPHW55AVCNFSM6AAAAAAUDGEC2OVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTQOJZG4YDIOBTGY> . You are receiving this because you were mentioned.Message ID: ***@***.***>

charlesLoder · 2024-01-20T03:17:02Z

Ok! Got אִם־שֹׁ֣ודְדֵי as ʔim-ˈʃoːðaðeː and בְחַגְוֵי־סֶּ֖לַע as vaħaʁveː-sˈsɛːlaʕ 🎉

Just so we don't have to find all the links:

the Tiberian branch with latest at 0d8b5ed
the latest tiberian release on npm (v2.5.1-tiberian.10)
the web app with the latest tiberian version

Thanks for all the help testing btw! It feels like this is getting closer to done

johnlockejrr · 2024-01-20T06:29:52Z

Awesome, thanks for the update! I'll do more tests and get back to you. I'm really happy, very close indeed

…

On Sat, 20 Jan 2024 at 04:17, Charles Loder ***@***.***> wrote: Ok! Got אִם־שֹׁ֣ודְדֵי as ʔim-ˈʃoːðaðeː and בְחַגְוֵי־סֶּ֖לַע as vaħaʁveː-sˈsɛːlaʕ 🎉 Just so we don't have to find all the links: - the Tiberian branch <https://github.com/charlesLoder/hebrew-transliteration/tree/tiberian> with latest at 0d8b5ed <0d8b5ed> - the latest tiberian release on npm <https://www.npmjs.com/package/hebrew-transliteration/v/2.5.1-tiberian.10> (v2.5.1-tiberian.10) - the web app <https://deploy-preview-77--hebrewtransliteration.netlify.app/#> with the latest tiberian version Thanks for all the help testing btw! It feels like this is getting closer to done — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHRI6HPJ5M6SONFATOLYPMZLTAVCNFSM6AAAAAAUDGEC2OVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMBRG4YTCMZZGI> . You are receiving this because you were mentioned.Message ID: ***@***.***>

charlesLoder · 2024-02-18T02:29:56Z

So testing the Genesis text has remained the same. Psalm 1, however, presents some more problems:

ʰalˈʁeː',
    received: 'ʕaːal-pʰalˈʁeː'
  },
  {
    text: 'אֲשֶׁ֣ר־יַעֲשֶׂ֣ה',
    expected: 'ʔaʃɛʀ̟-jaːʕaˈsɛː',
    received: 'ʔaʃɛːɛʀ̟-jaːʕaˈsɛː'
  },
  {
    text: 'יַצְלִֽיחַ׃',
    expected: 'jɑsˁˈliːjaħ',
    received: 'jɑsˁˈliːħaː'
  },
  {
    text: 'לֹֽא־כֵ֥ן',
    expected: 'loː-ˈχeːen',
    received: 'ˌloː-ˈχeːen'
  },
  {
    text: 'הָרְשָׁ֑עִים',
    expected: 'hɔːʀ̟aʃɔːˈʕiːim',
    received: 'hɔːɔʀ̟ˈʃɔːʕiːm'
  },
  {
    text: 'אֲשֶׁר־תִּדְּפֶ֗נּוּ',
    expected: 'ˌʔaˑʃɛʀ̟-tʰiddaˈfɛːɛnnuː',
    received: 'ʔaʃɛʀ̟-tʰiddaˈfɛːɛnnuː'
  },
  { text: 'רֽוּחַ׃', expected: 'ˈʀ̟uːwaħ', received: 'ˈʀ̟uːħaː' },
  {
    text: 'לֹֽא־יָקֻ֡מוּ',
    expected: 'loː-jɔːˈq̟uːmuː',
    received: 'ˌloː-jɔːˈq̟uːmuː'
  },
  {
    text: 'רְשָׁ֤עִים',
    expected: 'ʀ̟aʃɔːˈʕiːim',
    received: 'ʀ̟aˈʃɔːʕiːm'
  },
  {
    text: 'וְֽחַטָּאִ֥ים',
    expected: 'vaħɑttˁɔːˈʔiːim',
    received: 'vaħɑtˁtˁɔːˈʔiːim'
  },
  {
    text: 'צַ֝דִּיקִ֗ים',
    expected: 'sˁɑddiːˈq̟iːim',
    received: 'ˈsˁɑːɑddiːˈq̟iːim'
  },
  {
    text: 'כִּ֤י־יוֹדֵ֥עַ',
    expected: 'ˌkʰiː-joːˈðeːjaʕ',
    received: 'kʰiː-joːˈðeːaʕ'
  },
  {
    text: 'צַ֥דִּיקִים',
    expected: 'sˁɑddiːˈq̟iːim',
    received: 'ˈsˁɑːɑddiːq̟iːm'
  },
  { text: 'תֹּ֭אבֵד׃', expected: 'tʰoːˈveːeð', received: 'ˈtʰoːveð' }
]

A few of the issue are related to how the taamim characters in poetry function differently

johnlockejrr · 2024-02-18T06:04:12Z

Job, Proverbs and Psalms are a different story, they have different taamim

charlesLoder · 2024-02-19T02:55:24Z

Yeah, the difficulty is that when the syllabification package encounters a mark, like a tipcha, it doesn't know if it is a tipcha or a dechi.

charlesLoder · 2024-02-20T01:28:28Z

I was wrong! Yay! The Tipcha and dechi are encoded with different characters. So I just need to come up with some better logic

johnlockejrr · 2024-02-20T07:30:13Z

Glad to hear it! Yes, they are identical in form but encoded differently in the Unicode good fonts (not all)

…

On Tue, 20 Feb 2024 at 02:28, Charles Loder ***@***.***> wrote: I was wrong! Yay! The Tipcha and dechi are encoded with different characters. So I just need to come up with some better logic — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHUJGSFEYKKIFOERUY3YUP34PAVCNFSM6AAAAAAUDGEC2OVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSNJTGM2DOMRWGE> . You are receiving this because you were mentioned.Message ID: ***@***.***>

charlesLoder · 2024-03-10T02:59:10Z

Latest diffs of Psalm 1:

[
  {
    text: 'אַ֥שְֽׁרֵי-הָאִ֗ישׁ',
    expected: 'ˌʔaːˌʃaˑʀ̟eː-hɔːˈʔiːiʃ',
    received: 'ˈʔaːʃaʀ̟eː-hɔːˈʔiːiʃ'
  },
  {
    text: 'וּֽבְתוֹרָת֥וֹ',
    expected: 'ˌwuˑvθoːʀ̟ɔːˈθoː',
    received: 'wuvθoːʀ̟ɔːˈθoː'
  },
  { text: 'וְהָיָ֗ה', expected: 'ˌvɔˑhɔːˈjɔː', received: 'vɔhɔːˈjɔː' },
  {
    text: 'עַ֫ל־פַּלְגֵ֥י',
    expected: 'ˌʕaˑl-pʰalˈʁeː',
    received: 'ʕaːal-pʰalˈʁeː'
  },
  {
    text: 'אֲשֶׁ֣ר־יַעֲשֶׂ֣ה',
    expected: 'ʔaʃɛʀ̟-jaːʕaˈsɛː',
    received: 'ʔaʃɛːɛʀ̟-jaːʕaˈsɛː'
  },
  {
    text: 'לֹֽא־כֵ֥ן',
    expected: 'loː-ˈχeːen',
    received: 'ˌloː-ˈχeːen'
  },
  {
    text: 'הָרְשָׁ֑עִים',
    expected: 'hɔːʀ̟aʃɔːˈʕiːim',
    received: 'hɔːɔʀ̟ˈʃɔːʕiːm'
  },
  {
    text: 'אֲשֶׁר־תִּדְּפֶ֗נּוּ',
    expected: 'ˌʔaˑʃɛʀ̟-tʰiddaˈfɛːɛnnuː',
    received: 'ʔaʃɛʀ̟-tʰiddaˈfɛːɛnnuː'
  },
  {
    text: 'עַל־כֵּ֤ן ׀',
    expected: 'ʕal-ˈkʰeːen',
    received: 'ʕal-ˈkʰeːen '
  },
  {
    text: 'כִּ֤י־יוֹדֵ֥עַ',
    expected: 'ˌkʰiː-joːˈðeːjaʕ',
    received: 'kʰiː-joːˈðeːaʕ'
  },
  {
    text: 'צַ֥דִּיקִים',
    expected: 'sˁɑddiːˈq̟iːim',
    received: 'ˈsˁɑːɑddiːq̟iːm'
  }
]

A lot of issues are related to secondary stress related to the minor gaya, which is really difficult to figure out for the syllabification package.

johnlockejrr · 2024-03-10T08:32:26Z

Seems like it

charlesLoder · 2024-03-12T03:11:30Z

Alright!

Made some updates and adjusted the text I was using to compare Psa 1, I get this:

[
  {
    text: 'אַ֥שְֽׁרֵי-הָאִ֗ישׁ',
    expected: 'ˌʔaːˌʃaˑʀ̟eː-hɔːˈʔiːiʃ',
    received: 'ˈʔaːˌʃaˑʀ̟eː-hɔːˈʔiːiʃ'
  },
  {
    text: 'הָרְשָׁעִ֑ים',
    expected: 'hɔːʀ̟aʃɔːˈʕiːim',
    received: 'hɔːɔʀ̟ʃɔːˈʕiːim'
  },
  {
    text: 'אֲֽשֶׁר־תִּדְּפֶ֥נּוּ',
    expected: 'ˌʔaˑʃɛʀ̟-tʰiddaˈfɛːɛnnuː',
    received: 'ʔaʃɛʀ̟-tʰiddaˈfɛːɛnnuː'
  },
  {
    text: 'כִּֽי־יוֹדֵ֣עַ',
    expected: 'ˌkʰiː-joːˈðeːjaʕ',
    received: 'ˌkʰiˑ-joːˈðeːaʕ'
  }
]

At which case, getting any more accurate results really means digging even deeper into the weeds. I don't think I have it in me! :)

But, this honestly feels like it's in a good place. I may close it this week

johnlockejrr · 2024-03-12T14:37:07Z

For now you can publish it like this. Maybe later, in time, we can find something to tweak it, I’m very happy with it anyway, is one of its kind.

…

On Tue, 12 Mar 2024 at 04:11, Charles Loder ***@***.***> wrote: Alright! Made some updates and adjusted the text I was using to compare Psa 1, I get this: [ { text: 'אַ֥שְֽׁרֵי-הָאִ֗ישׁ', expected: 'ˌʔaːˌʃaˑʀ̟eː-hɔːˈʔiːiʃ', received: 'ˈʔaːˌʃaˑʀ̟eː-hɔːˈʔiːiʃ' }, { text: 'הָרְשָׁעִ֑ים', expected: 'hɔːʀ̟aʃɔːˈʕiːim', received: 'hɔːɔʀ̟ʃɔːˈʕiːim' }, { text: 'אֲֽשֶׁר־תִּדְּפֶ֥נּוּ', expected: 'ˌʔaˑʃɛʀ̟-tʰiddaˈfɛːɛnnuː', received: 'ʔaʃɛʀ̟-tʰiddaˈfɛːɛnnuː' }, { text: 'כִּֽי־יוֹדֵ֣עַ', expected: 'ˌkʰiː-joːˈðeːjaʕ', received: 'ˌkʰiˑ-joːˈðeːaʕ' }] At which case, getting any more accurate results really means digging even deeper into the weeds. I don't think I have it in me! :) But, this honestly feels like it's in a good place. I may close it this week — Reply to this email directly, view it on GitHub <#45 (comment)>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AD44GHU2HNN3C46TRB4VZULYXZ6APAVCNFSM6AAAAAAUDGEC2OVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSOBZHE3TCNJWGY> . You are receiving this because you were mentioned.Message ID: ***@***.***>

charlesLoder · 2024-03-13T01:13:23Z

CLOSED! 🎉

charlesLoder · 2024-03-13T01:14:45Z

If you shoot me an email (see my profile for my address), I can add you to the email updates and give credit

charlesLoder added this to the 2.4.0 milestone Jan 22, 2023

charlesLoder referenced this issue Mar 30, 2023

wip: tests for epenthetic vowel

37bbdc1

charlesLoder mentioned this issue Apr 4, 2023

Mid-word coda consonant without shva nah is silent (e.g. "יִשָּׂשכָר") #60

Closed

charlesLoder mentioned this issue Mar 13, 2024

Tiberian #77

Merged

charlesLoder closed this as completed in #77 Mar 13, 2024

Add Tiberian Transcription Schema #45

Add Tiberian Transcription Schema #45

Comments

charlesLoder commented Jan 22, 2023

johnlockejrr commented Jan 23, 2023

johnlockejrr commented Jan 23, 2023

charlesLoder commented Jan 26, 2023

johnlockejrr commented Jan 27, 2023

charlesLoder commented Jan 28, 2023

johnlockejrr commented Jan 31, 2023 • edited Loading

charlesLoder commented Feb 16, 2023

johnlockejrr commented Feb 16, 2023 via email • edited Loading

johnlockejrr commented Feb 16, 2023 • edited Loading

charlesLoder commented Feb 17, 2023

johnlockejrr commented Feb 17, 2023 • edited Loading

charlesLoder commented Mar 4, 2023

johnlockejrr commented Mar 4, 2023 • edited Loading

charlesLoder commented Mar 4, 2023

charlesLoder commented Mar 4, 2023

johnlockejrr commented Mar 5, 2023 • edited Loading

charlesLoder commented Mar 6, 2023

johnlockejrr commented Mar 6, 2023 • edited Loading

johnlockejrr commented Mar 6, 2023

charlesLoder commented Mar 26, 2023

johnlockejrr commented Mar 28, 2023

johnlockejrr commented Apr 4, 2023

johnlockejrr commented Apr 4, 2023

charlesLoder commented Apr 4, 2023

johnlockejrr commented Apr 4, 2023

johnlockejrr commented Apr 4, 2023 • edited Loading

charlesLoder commented Apr 13, 2023

charlesLoder commented Dec 27, 2023

charlesLoder commented Dec 29, 2023 • edited Loading

johnlockejrr commented Dec 29, 2023 via email • edited Loading

charlesLoder commented Jan 1, 2024

charlesLoder commented Jan 17, 2024

johnlockejrr commented Jan 17, 2024

johnlockejrr commented Jan 17, 2024

johnlockejrr commented Jan 17, 2024 • edited Loading

johnlockejrr commented Jan 17, 2024

charlesLoder commented Jan 18, 2024

johnlockejrr commented Jan 18, 2024 via email

johnlockejrr commented Jan 18, 2024 • edited Loading

johnlockejrr commented Jan 18, 2024 • edited Loading

charlesLoder commented Jan 19, 2024 • edited Loading

johnlockejrr commented Jan 19, 2024 via email

charlesLoder commented Jan 20, 2024

johnlockejrr commented Jan 20, 2024 via email

charlesLoder commented Feb 18, 2024

johnlockejrr commented Feb 18, 2024

charlesLoder commented Feb 19, 2024

charlesLoder commented Feb 20, 2024

johnlockejrr commented Feb 20, 2024 via email

charlesLoder commented Mar 10, 2024

johnlockejrr commented Mar 10, 2024

charlesLoder commented Mar 12, 2024

johnlockejrr commented Mar 12, 2024 via email

charlesLoder commented Mar 13, 2024

charlesLoder commented Mar 13, 2024

johnlockejrr commented Jan 31, 2023 •

edited

Loading

johnlockejrr commented Feb 16, 2023 via email •

edited

Loading

johnlockejrr commented Feb 16, 2023 •

edited

Loading

johnlockejrr commented Feb 17, 2023 •

edited

Loading

johnlockejrr commented Mar 4, 2023 •

edited

Loading

johnlockejrr commented Mar 5, 2023 •

edited

Loading

johnlockejrr commented Mar 6, 2023 •

edited

Loading

johnlockejrr commented Apr 4, 2023 •

edited

Loading

charlesLoder commented Dec 29, 2023 •

edited

Loading

johnlockejrr commented Dec 29, 2023 via email •

edited

Loading

johnlockejrr commented Jan 17, 2024 •

edited

Loading

johnlockejrr commented Jan 18, 2024 •

edited

Loading

johnlockejrr commented Jan 18, 2024 •

edited

Loading

charlesLoder commented Jan 19, 2024 •

edited

Loading