# Code

## Converting to Ugaritic and from Ugaritic

In [236]:
# Ugaritic script mapping in a single dictionary
latin_to_ugaritic = {
    "ʾa": "𐎀",
    "ʔa": "𐎀",
    "'a": "𐎀",
    "ả": "𐎀",
    "b": "𐎁",
    "g": "𐎂",
    "ḫ": "𐎃",
    "h_": "𐎃",
    "d": "𐎄",
    "h": "𐎅",
    "w": "𐎆",
    "z": "𐎇",
    "ḥ": "𐎈",
    "h.": "𐎈",
    "ṭ": "𐎉",
    "t.": "𐎉",
    "y": "𐎊",
    "k": "𐎋",
    "š": "𐎌",
    "s^": "𐎌",
    "l": "𐎍",
    "m": "𐎎",
    "ḏ": "𐎏",
    "d_": "𐎏",
    "n": "𐎐",
    "ẓ": "𐎑",
    "z.": "𐎑",
    "s": "𐎒",
    "ʿ": "𐎓",
    "`": "𐎓",
    "p": "𐎔",
    "ṣ": "𐎕",
    "s.": "𐎕",
    "q": "𐎖",
    "r": "𐎗",
    "ṯ": "𐎘",
    "t_": "𐎘",
    "ġ": "𐎙",
    "g.": "𐎙",
    "t": "𐎚",
    "ʾi": "𐎛",
    "ʔi": "𐎛",
    "'i": "𐎛",
    "ỉ": "𐎛",
    "ʾu": "𐎜",
    "ʔu": "𐎜",
    "'u": "𐎜",
    "ủ": "𐎜",
    "s2": "𐎝",
    "s₂": "𐎝",
    "s'": "𐎝",
    "s̀": "𐎝",
    "ś": "𐎝",

    "a": "",
    "e": "",
    "i": "",
    "o": "",
    "u": "",

    "ā": "",
    "ē": "",
    "ī": "",
    "ō": "",
    "ū": "",

    "a_": "",
    "e_": "",
    "i_": "",
    "o_": "",
    "u_": "",
    "/": ""
}

ugaritic_to_latin = {
    '𐎀': 'ả',
    '𐎁': 'b',
    '𐎂': 'g',
    '𐎃': 'ḫ',
    '𐎄': 'd',
    '𐎅': 'h',
    '𐎆': 'w',
    '𐎇': 'z',
    '𐎈': 'ḥ',
    '𐎉': 'ṭ',
    '𐎊': 'y',
    '𐎋': 'k',
    '𐎌': 'š',
    '𐎍': 'l',
    '𐎎': 'm',
    '𐎏': 'ḏ',
    '𐎐': 'n',
    '𐎑': 'ẓ',
    '𐎒': 's',
    '𐎓': 'ʿ',
    '𐎔': 'p',
    '𐎕': 'ṣ',
    '𐎖': 'q',
    '𐎗': 'r',
    '𐎘': 'ṯ',
    '𐎙': 'ġ',
    '𐎚': 't',
    '𐎛': 'ỉ',
    '𐎜': 'ủ',
    '𐎝': 's̀'
 }

ugaritic_to_cal = {
    '𐎀': ')',
    '𐎁': 'b',
    '𐎂': 'g',
    '𐎃': 'x',
    '𐎄': 'd',
    '𐎅': 'h',
    '𐎆': 'w',
    '𐎇': 'z',
    '𐎈': 'x',
    '𐎉': 'T',
    '𐎊': '(',
    '𐎋': 'k',
    '𐎌': ['$', '&'],
    '𐎍': 'l',
    '𐎎': 'm',
    '𐎏': 'd',
    '𐎐': 'n',
    '𐎑': 'T',
    '𐎒': 's',
    '𐎓': '(',
    '𐎔': 'p',
    '𐎕': ['c', '('],
    '𐎖': 'q',
    '𐎗': 'r',
    '𐎘': 't',
    '𐎙': ['T', '('],
    '𐎚': 't',
    '𐎛': ')',
    '𐎜': ')',
    '𐎝': 's'
 }

# According to Huehnergard, 2012
ugaritic_to_hebrew = {
    '𐎀': ['אַ'],
    '𐎁': ['ב'],
    '𐎂': ['ג'],
    '𐎃': ['ח'],
    '𐎄': ['ד','ז'],
    '𐎅': ['ה'],
    '𐎆': ['ו'],
    '𐎇': ['ז'],
    '𐎈': ['ח'],
    '𐎉': ['ט'],
    '𐎊': ['י'],
    '𐎋': ['כ'],
    '𐎌': ['ש'],
    '𐎍': ['ל'],
    '𐎎': ['מ'],
    '𐎏': ['ז'],
    '𐎐': ['נ'],
    '𐎑': ['צ'],
    '𐎒': ['ס'],
    '𐎓': ['ע'],
    '𐎔': ['פ'],
    '𐎕': ['צ'],
    '𐎖': ['ק'],
    '𐎗': ['ר'],
    '𐎘': ['שׁ'],
    '𐎙': ['ע','צ'],
    '𐎚': ['ת'],
    '𐎛': ['א' + '\u05B4'],
    '𐎜': ['א' + '\u05BB'],
    '𐎝': ['ס']
 }

def phonetic2ugaritic(text):
    """
    Translates Latin-script text into Ugaritic script.

    Parameters:
        text (str): The input text in Latin script.

    Returns:
        str: The translated text in Ugaritic script.
    """
    result = []
    i = 0
    while i < len(text):
        match = None
        # Check for longest matching keys first
        for key in sorted(latin_to_ugaritic.keys(), key=len, reverse=True):
            if text[i:i + len(key)] == key:
                match = key
                break
        if match:
            result.append(latin_to_ugaritic[match])
            i += len(match)  # Skip processed characters
        else:
            result.append(text[i])  # Keep non-mappable characters as-is
            i += 1
    return ''.join(result)

def ugaritic2phonetic(text):
    """
    Translates Ugaritic-script text into Latin script.

    Parameters:
        text (str): The input text in Ugaritic script.

    Returns:
        str: The translated text in Latin script.
    """
    result = []
    for char in text:
        if char in ugaritic_to_latin:
            result.append(ugaritic_to_latin[char])
        else:
            result.append(char)  # Keep non-mappable characters as-is
    return ''.join(result)


def ugaritic2hebrew(text):
    """
    Translates Ugaritic-script text into Hebrew.

    Parameters:
        text (str): The input text in Ugaritic script.

    Returns:
        list: A flat list of all possible Hebrew translations.
    """

    if not text:
        return [""]

    results = []
    char = text[0]

    if char in ugaritic_to_hebrew:
        for translation in ugaritic_to_hebrew[char]:
            for rest in ugaritic2hebrew(text[1:]):
                results.append(translation + rest)
    else:
        for rest in ugaritic2hebrew(text[1:]):
          results.append(char + rest)

    return results


def ugaritic2cal(text):
    """
    Translates Ugaritic-script text into CAL search notation.

    Parameters:
        text (str): The input text in Ugaritic script.

    Returns:
        list: A flat list of all possible CAL translations.
    """

    if not text:
        return [""]

    results = []
    char = text[0]

    if char in ugaritic_to_cal:
        for translation in ugaritic_to_cal[char]:
            for rest in ugaritic2cal(text[1:]):
                results.append(translation + rest)
    else:
        for rest in ugaritic2cal(text[1:]):
          results.append(char + rest)

    return results

## Chaining Convertors

In [244]:
def chain(f, g):
    return lambda x : g(f(x)) if isinstance(f(x), str) else [g(m) for m in f(x)]

def print_transcr(orig, converter):
    print(f"{orig}  ->  {converter(orig)}")

## CAL Converters

In [172]:
# Extracted from view-source:https://cal.huc.edu/searching/fullbrowser.html
cal_table = """
<table border="2" fill="2"  width="90%">

	<tr>
		<td align="center">CAL Code</td>
		<td align="center">)</td>
		<td align="center">b</td>
		<td align="center">g</td>
		<td align="center">d</td>
		<td align="center">h</td>
		<td align="center">w</td>
		<td align="center">z</td>
		<td align="center">x</td>
		<td align="center">T</td>
		<td align="center">y</td>
		<td align="center">k</td>
		<td align="center">l</td>
		<td align="center">m</td>
		<td align="center">n</td>
		<td align="center">s</td>
		<td align="center">(</td>
		<td align="center">p</td>
		<td align="center">P</td>
		<td align="center">c</td>
		<td align="center">q</td>
		<td align="center">r</td>
		<td align="center">$</td>
		<td align="center">&</td>
		<td align="center">t</td>
		<td align="center">@</td>
	</tr>
	<tr>
		<td align="center">Unicode</td>
		<td align="center">ˀ</td>
		<td align="center">b</td>
		<td align="center">g</td>
		<td align="center">d</td>
		<td align="center">h</td>
		<td align="center">w</td>
		<td align="center">z</td>
		<td align="center">ḥ</td>
		<td align="center">ṭ</td>
		<td align="center">y</td>
		<td align="center">k</td>
		<td align="center">l</td>
		<td align="center">m</td>
		<td align="center">n</td>
		<td align="center">s</td>
		<td align="center">ˁ</td>
		<td align="center">p</td>
		<td align="center">ṗ</td>
		<td align="center">ṣ</td>
		<td align="center">q</td>
		<td align="center">r</td>
		<td align="center">š</td>
		<td align="center">ś</td>
		<td align="center">t</td>
		<td align="center">**</td>
	</tr>
	<tr>
		<td align="center">Hebrew</td>
		<td align="center">א</td>
		<td align="center">ב</td>
		<td align="center">ג</td>
		<td align="center">ד</td>
		<td align="center">ה</td>
		<td align="center">ו</td>
		<td align="center">ז</td>
		<td align="center">ח</td>
		<td align="center">ט</td>
		<td align="center">י</td>
		<td align="center">כ</td>
		<td align="center">ל</td>
		<td align="center">מ</td>
		<td align="center">נ</td>
		<td align="center">ס</td>
		<td align="center">ע</td>
		<td align="center">פ</td>
		<td align="center">&nbsp;</td>
		<td align="center">צ</td>
		<td align="center">ק</td>
		<td align="center">ר</td>
		<td align="center">ש</td>
		<td align="center">שׂ</td>
		<td align="center">ת</td>
		<td align="center">**</td>
	</tr>
	<tr>
		<td align="center">Syriac</td>
		<td align="center" class="syr">ܐ</td>
		<td align="right" class="syr">ܒ</td>
		<td align="right" class="syr">ܓ</td>
		<td align="right" class="syr">ܕ</td>
		<td align="right" class="syr">ܗ</td>
		<td align="right" class="syr">ܘ</td>
		<td align="right" class="syr">ܙ</td>
		<td align="right" class="syr">ܚ</td>
		<td align="right" class="syr">ܛ</td>
		<td align="right" class="syr">ܝ</td>
		<td align="right" class="syr">ܟ</td>
		<td align="right" class="syr">ܠ</td>
		<td align="right" class="syr">ܡ</td>
		<td align="right" class="syr">ܢ</td>
		<td align="right" class="syr">ܣ</td>
		<td align="right" class="syr">ܥ</td>
		<td align="right" class="syr">ܦ</td>
		<td align="right" class="syr">ܧ</td>
		<td align="right" class="syr">ܨ</td>
		<td align="right" class="syr">ܩ</td>
		<td align="right" class="syr">ܪ</td>
		<td align="center" class="syr">ܫ</td>
		<td align="right" class="syr">&nbsp; </td>
		<td align="center" class="syr">ܬ</td>
		<td align="center" >**</td>
	</tr>
</table>"""

In [173]:
# prompt: convert cal_table to a dataframe

import pandas as pd
from bs4 import BeautifulSoup

# Parse the HTML table using BeautifulSoup
soup = BeautifulSoup(cal_table, 'html.parser')
table = soup.find('table')

# Extract data from the table
data = []
rows = table.find_all('tr')
for row in rows:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])  # Get rid of empty values

# Create a Pandas DataFrame
df = pd.DataFrame(data[1:], columns=data[0]).transpose()
df.columns = df.iloc[0]
df = df.iloc[1:]
df.head()

CAL Code,Unicode,Hebrew,Syriac
),ˀ,א,ܐ
b,b,ב,ܒ
g,g,ג,ܓ
d,d,ד,ܕ
h,h,ה,ܗ


In [219]:
# prompt: Create converter functions from CAL to Unicode, Hebrew, Syriac.

def cal2unicode(cal_text):
    """Converts CAL text to Unicode."""
    unicode_mapping = dict(zip(df.index, df['Unicode']))
    result = ''
    for char in cal_text:
        result += unicode_mapping.get(char, char) # If char not found, keep original
    return result

def cal2hebrew(cal_text):
    """Converts CAL text to Hebrew."""
    hebrew_mapping = dict(zip(df.index, df['Hebrew']))
    result = ''
    for char in cal_text:
        result += hebrew_mapping.get(char, char) # If char not found, keep original
    return result

def cal2syriac(cal_text):
    """Converts CAL text to Syriac."""
    syriac_mapping = dict(zip(df.index, df['Syriac']))
    result = ''
    for char in cal_text:
        result += syriac_mapping.get(char, char)  # If char not found, keep original
    return result

In [217]:
# prompt: implement hebrew2cal() and unicode2cal() functions

def hebrew2cal(hebrew_text):
    """Converts Hebrew text to CAL code."""
    cal_mapping = dict(zip(df['Hebrew'], df.index))
    result = ''
    for char in hebrew_text:
        result += cal_mapping.get(char, char) # If char not found, keep original
    return result

def unicode2cal(unicode_text):
    """Converts Unicode text to CAL code."""
    cal_mapping = dict(zip(df['Unicode'], df.index))
    result = ''
    for char in unicode_text:
        result += cal_mapping.get(char, char) # If char not found, keep original
    return result

## CAL Search

In [215]:
# prompt: Use this URL as an example: "https://cal.huc.edu/browseSKEYheaders.php?tools=on&first3=%29%24kr". This requests unicode 'ˀškr' == CAL ')$kr', note the first3 parameter.
# Write a function that renders an iframe inside the jupyter notebook showing the result for an input Aramaic word (use unicode2cal()) returned by a request to "https://cal.huc.edu/browseSKEYheaders.php"

import requests
from IPython.display import IFrame

def display_cal(aramaic_word):
  """
  Displays an iframe showing the result of a request to the CAL website for a given Aramaic word.

  Args:
    aramaic_word: The Aramaic word to search for (Unicode).
  """
  cal_code = unicode2cal(aramaic_word)
  url = f"https://cal.huc.edu/browseSKEYheaders.php?tools=on&first3={requests.utils.quote(cal_code)}"
  iframe = IFrame(url, width=800, height=600)
  display(iframe)

## Logos Search

In [212]:
def generate_logos_links(hebrew_word):
  """Generates a hyperlink to the Logos Bible Software search for a Hebrew word.

  Args:
    hebrew_word: The Hebrew word to search for.

  Returns:
    HTML strings representing the hyperlinks.
  """

  # URL encode the Hebrew word
  encoded_word = requests.utils.quote(hebrew_word)
  # Construct the Logos URL
  base_url = "https://app.logos.com/books/"
  lex_components = {
      'David J.A. Clines DCH': 'LLS%3A46.30.12',
      'HALOT': ['LLS%3ADICHEBREW', 'he'],
      'TDOT': ['LLS%3ATDOT', 'he'],
      'BDB': ['LLS%3A46.30.16', 'he'],
      'Jastrow’s Dictionary of the Targumim, the Talmud Babli and Yerushalmi, and the Midrashic Literature': ['LLS%3AARAMAICLEX', 'arc'],
      'CAL': ['LLS%3A46.30.28', 'arc']
      }
  res = []
  for key, lex_component in lex_components.items():
    url = f"{base_url}{lex_component[0]}/headwords/{encoded_word}?headwordLanguage={lex_component[1]}&layout=one"
    # Create the HTML hyperlink
    hyperlink = f'<p><a href="{url}" target="_blank">{key}: {hebrew_word}</a></p>'
    res.append(hyperlink)
  return "".join(res)

## Wiktionary Search

In [227]:
def search_wiktionary(word):
  """Searches Wiktionary for a word and displays the result in an iframe.

  Args:
    word: The word to search for.
  """
  url = f"https://en.wiktionary.org/wiki/{word}"
  iframe = IFrame(url, width=800, height=600)
  display(iframe)

# Examles

## Converting to and from Ugaritic

In [223]:
print_transcr("𐎛𐎁", ugaritic2phonetic)
print_transcr("𐎛𐎁", ugaritic2hebrew)
print_transcr("𐎝𐎝𐎆", ugaritic2phonetic)
print_transcr("𐎝𐎝𐎆", ugaritic2hebrew)
print_transcr("𐎜𐎘𐎔𐎚", ugaritic2hebrew)
print_transcr("𐎜𐎘𐎔𐎚", ugaritic2phonetic)
print_transcr("ủṯpt", phonetic2ugaritic)
print_transcr("/ʾuṯpatu/", phonetic2ugaritic)
print_transcr("'ut_patu", phonetic2ugaritic)
print_transcr("'ut_patu", chain(phonetic2ugaritic, ugaritic2hebrew))
print_transcr("𐎄𐎈𐎍", ugaritic2phonetic)
print_transcr("𐎄𐎈𐎍", ugaritic2hebrew)
print_transcr("𐎄𐎄𐎎𐎌", ugaritic2hebrew)
print_transcr("𐎄𐎄𐎎𐎌", ugaritic2phonetic)

𐎛𐎁  ->  ỉb
𐎛𐎁  ->  ['אִב']
𐎝𐎝𐎆  ->  s̀s̀w
𐎝𐎝𐎆  ->  ['ססו']
𐎜𐎘𐎔𐎚  ->  ['אֻשׁפת']
𐎜𐎘𐎔𐎚  ->  ủṯpt
ủṯpt  ->  𐎜𐎘𐎔𐎚
/ʾuṯpatu/  ->  𐎜𐎘𐎔𐎚
'ut_patu  ->  𐎜𐎘𐎔𐎚
'ut_patu  ->  ['אֻשׁפת']
𐎄𐎈𐎍  ->  dḥl
𐎄𐎈𐎍  ->  ['דחל', 'זחל']
𐎄𐎄𐎎𐎌  ->  ['דדמש', 'דזמש', 'זדמש', 'זזמש']
𐎄𐎄𐎎𐎌  ->  ddmš


## Converting for CAL

In [240]:
# Conversion from CAL notation
print_transcr(')bg', cal2unicode)
print_transcr(')bg', cal2hebrew)
print_transcr(')bg', cal2syriac)

# Conversion to CAL notation
print_transcr('אבג', hebrew2cal)
print_transcr('ˀbg', unicode2cal)

)bg  ->  ˀbg
)bg  ->  אבג
)bg  ->  ܐܒܓ
אבג  ->  )bg
ˀbg  ->  )bg


In [249]:
# Example chaining on the conversions
print_transcr("𐎀𐎌𐎋𐎗𐎗", chain(ugaritic2cal, cal2syriac))

𐎀𐎌𐎋𐎗𐎗  ->  ['ܐܫܟܪܪ', 'ܐܬܟܪܪ']


## Logos Search

In [213]:
# Example Logos Lexicons links generator:
hebrew_word = "חמר"  # Replace with the desired Hebrew word
links = generate_logos_links(hebrew_word)

#To display the link as an actual clickable link in a Jupyter environment, you can use IPython.display.HTML
from IPython.display import HTML
HTML(links)

In [211]:
HTML(generate_logos_links(ugaritic2hebrew('𐎋𐎚𐎁')[0]))

## CAL Search

In [248]:
# Example usage
display_cal(chain(ugaritic2cal, unicode2cal)("𐎅𐎄")[0])

## Wiktionary Search

In [233]:
search_wiktionary(ugaritic2hebrew('𐎅𐎄')[0])