In [1]:
## Completed Parts: 
## 1) Tittles 
## 3) Accents
## 5) Numbers 

## In progress: 
## 4) Capture logograms within the text (e.g. ku3-babbar as KU.BABBAR)

## a) months -- e.g. law 13 line 17, 19 _iti 6(!)-kam_
## b) broken signs (#): law 28 line 30; law 30 line 51; ! -59 near the end "a-zu!"
## c) change hypen to periods

## Additional Bugs: 

## c) 3 akkadian signs between signs 62 - LUGAL_ sha i-na _kalam
## d) line 94 -- mistype of text : _ ISZ-QU2-LU A-NA _KU3-BABBER_DAM-GAR3_
## e) law 277 line 54 and 56 -- the parenthesis messess it all up
## f) e.g. 274 - all broken up
## ISSUE: Law 15

In [2]:
## Up-to-date program

"""ATF Converter"""   
## Replaces Akkadian letters, marked logograms, and determinatives. 

__author__ = ['Andrew Deloucas <adeloucas@g.harvard.com>']
__license__ = 'MIT License. See LICENSE.'
                                                        
import re

class ATFConverter(object):     
    def __init__(self):
        ## Accent Converter
        ## Row one and two: accent is on first vowel (3 phonemes)
        ## Row three: accent is on first / middle vowel (2 phonemes)
        ## Row four: accent is on last vowel (standard)
        ## Row five: accent is on last vowel (considers 'sz' and 's,/t,')
        ## known issue: Arad2
        accents =  [(r'[aA]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'á\\1'), 
                    (r'[aA]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'à\\1'),
                    (r'[aA]([a-zA-Z][aeiouAEIOU])2', 'á\\1'), (r'[aA]([a-zA-Z][aeiouAEIOU])3', 'à\\1'),
                    (r'[aA]2', 'á'), (r'[aA]3', 'à'), (r'[aA]([a-zA-Z])2', 'á\\1'), (r'[aA]([a-zA-Z])3', 'à\\1'), 
                    (r'[aA]([sStT])([zZ,])2', 'á\\1\\2'), (r'[aA]([sStT])([zZ,])3', 'à\\1\\2'),
            
                    (r'[eE]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'é\\1'), 
                    (r'[eE]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'è\\1'),
                    (r'[eE]([a-zA-Z][aeiouAEIOU])2', 'é\\1'), (r'[eE]([a-zA-Z][aeiouAEIOU])3', 'è\\1'),
                    (r'[eE]2', 'é'), (r'[eE]3', 'è'), (r'[eE]([a-zA-Z])2', 'é\\1'), (r'[eE]([a-zA-Z])3', 'è\\1'),
                    (r'[eE]([sStT])([zZ,])2', 'é\\1\\2'), (r'[eE]([sStT])([zZ,])3', 'è\\1\\2'),
                    
                    (r'[iI]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'í\\1'), 
                    (r'[iI]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'ì\\1'),
                    (r'[iI]([a-zA-Z][aeiouAEIOU])2', 'í\\1'), (r'[iI]([a-zA-Z][aeiouAEIOU])3', 'ì\\1'),
                    (r'[iI]2', 'í'), (r'[iI]3', 'ì'), (r'[iI]([a-zA-Z])2', 'í\\1'), (r'[iI]([a-zA-Z])3', 'ì\\1'),
                    (r'[iI]([sStT])([zZ,])2', 'í\\1\\2'), (r'[iI]([sStT])([zZ,])3', 'ì\\1\\2'),
                    
                    (r'[oO]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'ó\\1'), 
                    (r'[oO]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'ò\\1'),
                    (r'[oO]([a-zA-Z][aeiouAEIOU])2', 'ó\\1'), (r'[oO]([a-zA-Z][aeiouAEIOU])3', 'ò\\1'),
                    (r'[oO]2', 'ó'), (r'[oO]3', 'ò'), (r'[oO]([a-zA-Z])2', 'ó\\1'), (r'[oO]([a-zA-Z])3', 'ò\\1'),
                    (r'[oO]([sStT])([zZ,])2', 'ó\\1\\2'), (r'[oO]([sStT])([zZ,])3', 'ò\\1\\2'),
                    
                    (r'[uU]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'ú\\1'), 
                    (r'[uU]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'ù\\1'),
                    (r'[uU]([a-zA-Z][aeiouAEIOU])2', 'ú\\1'), (r'[uU]([a-zA-Z][aeiouAEIOU])3', 'ù\\1'),
                    (r'[uU]2', 'ú'), (r'[uU]3', 'ù'), (r'[uU]([a-zA-Z])2', 'ú\\1'), (r'[uU]([a-zA-Z])3', 'ù\\1'),
                    (r'[uU]([sStT])([zZ,])2', 'ú\\1\\2'), (r'[uU]([sStT])([zZ,])3', 'ù\\1\\2'),
                    
        ## this subscripts numbers after 2 and 3; CHECK IF ANY SIGNS GO HIGHER THAN 18.
                    (r'([a-zA-Z])4', '\\1₄'), (r'([a-zA-Z])5', '\\1₅'), (r'([a-zA-Z])6', '\\1₆'), 
                    (r'([a-zA-Z])7', '\\1₇'), (r'([a-zA-Z])8', '\\1₈'), (r'([a-zA-Z])9', '\\1₉'), 
                    (r'([a-zA-Z])10', '\\1₁₀'), (r'([a-zA-Z])11', '\\1₁₁'), (r'([a-zA-Z])12', '\\1₁₂'), 
                    (r'([a-zA-Z])13', '\\1₁₃'), (r'([a-zA-Z])14', '\\1₁₄'), (r'([a-zA-Z])15', '\\1₁₅'), 
                    (r'([a-zA-Z])16', '\\1₁₆'),(r'([a-zA-Z])17', '\\1₁₇'), (r'([a-zA-Z])18', '\\1₁₈')]
        
        ## determinative converter; check ARM1 for logograms
        tittles =  [(r's,', 'ṣ'), (r'sz', 'š'), (r't,', 'ṭ'), (r'S,', 'Ṣ'), (r'SZ', 'Š'), (r'T,', 'Ṭ'),
                    (r'{d}', 'ᵈ'), (r'{diš}', '𒁹'), (r'{geš}', 'ᵍᵉˢᶻ'), (r'{i7}', 'ⁱ⁷'), (r'{i₇}', 'ⁱ⁷'), 
                    (r'{iri}', 'ⁱʳⁱ'), (r'{ki}', 'ᵏⁱ'), (r'{kuš}', 'ᵏᶸˢᶻ'), (r'{lu2}', 'ˡᶸ²'), 
                    (r'{lú}', 'ˡᶸ²'), (r'{munus}', 'ᵐᶸⁿᶸˢ'), (r'{še}', 'ˢᶻᵉ'), (r'{uzu}', 'ᶸᶻᶸ'),
                    (r'\(u\)', '(𒌋)'), (r'\(diš\)', '(𒁹)')]
        
        self.accents = \
            [(re.compile(regex), repl) for (regex, repl) in accents]
        self.tittles = \
            [(re.compile(regex), repl) for (regex, repl) in tittles]

    
    def transliterate(self, text):
        # gives signs accents & subscript numbers #
        for (pattern, repl) in self.accents:
            text = re.subn(pattern, repl, text)[0]
       
        # crosses t's & dots i's #
        for (pattern, repl) in self.tittles:
            text = re.subn(pattern, repl, text)[0]
            
        # capitalizes sumerian by finding instances of underscores#
        for x in re.finditer(r'\_\w*\_', text):
            text = re.sub(r'\_\w*\_', lambda x: x.group(0).upper(), text)
        for y in re.finditer(r'[\_]\w*[\s-]\w*[\_]', text):
            text = re.sub(r'[\_]\w*[\s-]\w*[\_]', lambda y: y.group(0).upper(), text)
        for c in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\_]', lambda c: c.group(0).upper(), text)
        for d in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*\_]', lambda d: d.group(0).upper(), text)
        for e in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', lambda e: e.group(0).upper(), text)
        for f in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', lambda f: f.group(0).upper(), text)
        for g in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', lambda c: g.group(0).upper(), text)
        #lowercases any akkadian found in the above section #
        for cc in re.finditer(r'\_[\s-]\w*[\s-]\_', text):
            text = re.sub(r'\_[\s-]\w*[\s-]\_', lambda cc: cc.group(0).lower(), text)
        for dd in re.finditer(r'\_[\s-]\w*[\s-]\w*[\s-]\_', text):
            text = re.sub(r'\_[\s-]\w*[\s-]\w*[\s-]\_', lambda dd: dd.group(0).lower(), text)
        for ee in re.finditer(r'\_[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\_', text):
            text = re.sub(r'\_[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\_', lambda ee: ee.group(0).lower(), text)
        #removes logogram markers -- keep off until above is checked and balanced.
            #text = re.sub(r'_', '', text)
        return text   

In [3]:
#Up-to-date Testing Pad

f = open("Akkadiham.txt", 'r')
original = f.read()

test = ATFConverter()
hammurabi_text = test.transliterate(original)

print(hammurabi_text)

Primary publication: RIME 4.03.06.add21, ex. 01
Author(s): Frayne, Douglas R.
Publication date: 1990
Secondary publication(s): Bergmann, Eugen, Codex Hammurapi
Collection: Louvre Museum, Pariṣ France
Museum no.: Sb 00008
Accession no.: 
Provenience: Susa (mod. Shush)
Excavation no.: 
Period: Old Babylonian (ca. 1900-1600 BC)
Dates referenced: Hammurapi.00.00.00
Object type: other (see object remarks)
Remarks: stele
Material: stone: basalt
Language: Akkadian
Genre: Royal/Monumental
Sub-genre: witness
CDLI comments: 
Catalogue source: 20041012 cdliadmin
ATF source: Roth, Martha
Translation: 
UCLA Library ARK: 21198/zz001t₈p₉j
Composite no.: Q006387
Seal no.: 
CDLI no.: P249253
Transliteration:
&P249253 = RIME 4.03.06.add21, ex. 01 
#atf: lang akk 
@object stele 
@surface a 
@column 1 
@prologue 
1. ì-nu an ṣi-ru-um 
2. _LUGAL_ ᵈa-nun-na-ki 
3. ᵈen-líl 
4. be-el ša-me-e 
5. ù er-ṣe-tim 
6. ša-i-im 
7. ši-ma-at _KALAM_ 
8. a-na ᵈmarduk 
9. _DUMU_ re-eš-ti-im 
10. ša ᵈen-ki 
11. ᵈen-líl-ut 

In [4]:
#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`TESTING AREA`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*
"""

In progress: 
Capture logograms within the text (e.g. ku3-babbar as KU.BABBAR)

a) months -- e.g. law 13 line 17, 19 _iti 6(!)-kam_
b) broken signs (#): law 28 line 30; law 30 line 51; ! -59 near the end "a-zu!"
c) change hypen to periods

## Additional Bugs: ##

c) 3 akkadian signs between signs 62 - LUGAL_ sha i-na _kalam
d) line 94 -- mistype of text : _ ISZ-QU2-LU A-NA _KU3-BABBER_DAM-GAR3_
e) law 277 line 54 and 56 -- the parenthesis messess it all up
f) e.g. 274 - all broken up
g) Law 15

"""
#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`TESTING AREA`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*`.,`~#~`,.`*

'\n\n'

In [3]:
import re

class ATFConvertertest(object):     
    def __init__(self):
        ## Accent Converter
        ## Row one and two: accent is on first vowel (3 phonemes)
        ## Row three: accent is on first / middle vowel (2 phonemes)
        ## Row four: accent is on last vowel (standard)
        ## Row five: accent is on last vowel (considers 'sz' and 's,/t,')
        ## known issue: Arad2
        accents =  [(r'[aA]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'á\\1'), 
                    (r'[aA]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'à\\1'),
                    (r'[aA]([a-zA-Z][aeiouAEIOU])2', 'á\\1'), (r'[aA]([a-zA-Z][aeiouAEIOU])3', 'à\\1'),
                    (r'[aA]2', 'á'), (r'[aA]3', 'à'), (r'[aA]([a-zA-Z])2', 'á\\1'), (r'[aA]([a-zA-Z])3', 'à\\1'), 
                    (r'[aA]([sStT])([zZ,])2', 'á\\1\\2'), (r'[aA]([sStT])([zZ,])3', 'à\\1\\2'),
            
                    (r'[eE]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'é\\1'), 
                    (r'[eE]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'è\\1'),
                    (r'[eE]([a-zA-Z][aeiouAEIOU])2', 'é\\1'), (r'[eE]([a-zA-Z][aeiouAEIOU])3', 'è\\1'),
                    (r'[eE]2', 'é'), (r'[eE]3', 'è'), (r'[eE]([a-zA-Z])2', 'é\\1'), (r'[eE]([a-zA-Z])3', 'è\\1'),
                    (r'[eE]([sStT])([zZ,])2', 'é\\1\\2'), (r'[eE]([sStT])([zZ,])3', 'è\\1\\2'),
                    
                    (r'[iI]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'í\\1'), 
                    (r'[iI]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'ì\\1'),
                    (r'[iI]([a-zA-Z][aeiouAEIOU])2', 'í\\1'), (r'[iI]([a-zA-Z][aeiouAEIOU])3', 'ì\\1'),
                    (r'[iI]2', 'í'), (r'[iI]3', 'ì'), (r'[iI]([a-zA-Z])2', 'í\\1'), (r'[iI]([a-zA-Z])3', 'ì\\1'),
                    (r'[iI]([sStT])([zZ,])2', 'í\\1\\2'), (r'[iI]([sStT])([zZ,])3', 'ì\\1\\2'),
                    
                    (r'[oO]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'ó\\1'), 
                    (r'[oO]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'ò\\1'),
                    (r'[oO]([a-zA-Z][aeiouAEIOU])2', 'ó\\1'), (r'[oO]([a-zA-Z][aeiouAEIOU])3', 'ò\\1'),
                    (r'[oO]2', 'ó'), (r'[oO]3', 'ò'), (r'[oO]([a-zA-Z])2', 'ó\\1'), (r'[oO]([a-zA-Z])3', 'ò\\1'),
                    (r'[oO]([sStT])([zZ,])2', 'ó\\1\\2'), (r'[oO]([sStT])([zZ,])3', 'ò\\1\\2'),
                    
                    (r'[uU]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])2', 'ú\\1'), 
                    (r'[uU]([a-zA-Z][aeiouAEIOU][a-zA-Z][aeiouAEIOU])3', 'ù\\1'),
                    (r'[uU]([a-zA-Z][aeiouAEIOU])2', 'ú\\1'), (r'[uU]([a-zA-Z][aeiouAEIOU])3', 'ù\\1'),
                    (r'[uU]2', 'ú'), (r'[uU]3', 'ù'), (r'[uU]([a-zA-Z])2', 'ú\\1'), (r'[uU]([a-zA-Z])3', 'ù\\1'),
                    (r'[uU]([sStT])([zZ,])2', 'ú\\1\\2'), (r'[uU]([sStT])([zZ,])3', 'ù\\1\\2'),
                    
        ## this subscripts numbers after 2 and 3; CHECK IF ANY SIGNS GO HIGHER THAN 18.
                    (r'([a-zA-Z])4', '\\1₄'), (r'([a-zA-Z])5', '\\1₅'), (r'([a-zA-Z])6', '\\1₆'), 
                    (r'([a-zA-Z])7', '\\1₇'), (r'([a-zA-Z])8', '\\1₈'), (r'([a-zA-Z])9', '\\1₉'), 
                    (r'([a-zA-Z])10', '\\1₁₀'), (r'([a-zA-Z])11', '\\1₁₁'), (r'([a-zA-Z])12', '\\1₁₂'), 
                    (r'([a-zA-Z])13', '\\1₁₃'), (r'([a-zA-Z])14', '\\1₁₄'), (r'([a-zA-Z])15', '\\1₁₅'), 
                    (r'([a-zA-Z])16', '\\1₁₆'),(r'([a-zA-Z])17', '\\1₁₇'), (r'([a-zA-Z])18', '\\1₁₈')]
        
        ## determinative converter; check ARM1 for logograms
        tittles =  [(r's,', 'ṣ'), (r'sz', 'š'), (r't,', 'ṭ'), (r'S,', 'Ṣ'), (r'SZ', 'Š'), (r'T,', 'Ṭ'),
                    (r'{d}', 'ᵈ'), (r'{diš}', '𒁹'), (r'{geš}', 'ᵍᵉˢᶻ'), (r'{i7}', 'ⁱ⁷'), (r'{i₇}', 'ⁱ⁷'), 
                    (r'{iri}', 'ⁱʳⁱ'), (r'{ki}', 'ᵏⁱ'), (r'{kuš}', 'ᵏᶸˢᶻ'), (r'{lu2}', 'ˡᶸ²'), 
                    (r'{lú}', 'ˡᶸ²'), (r'{munus}', 'ᵐᶸⁿᶸˢ'), (r'{še}', 'ˢᶻᵉ'), (r'{uzu}', 'ᶸᶻᶸ'),
                    (r'\(u\)', '(𒌋)'), (r'\(diš\)', '(𒁹)')]
        
        self.accents = \
            [(re.compile(regex), repl) for (regex, repl) in accents]
        self.tittles = \
            [(re.compile(regex), repl) for (regex, repl) in tittles]

    
    def testing(self, text):
        # gives signs accents & subscript numbers #
        for (pattern, repl) in self.accents:
            text = re.subn(pattern, repl, text)[0]
       
        # crosses t's & dots i's #
        for (pattern, repl) in self.tittles:
            text = re.subn(pattern, repl, text)[0]
            
        # capitalizes sumerian by finding instances of underscores#
        for x in re.finditer(r'\_\w*\_', text):
            text = re.sub(r'\_\w*\_', lambda x: x.group(0).upper(), text)
        for y in re.finditer(r'[\_]\w*[\s-]\w*[\_]', text):
            text = re.sub(r'[\_]\w*[\s-]\w*[\_]', lambda y: y.group(0).upper(), text)
        for c in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\_]', lambda c: c.group(0).upper(), text)
        for d in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*\_]', lambda d: d.group(0).upper(), text)
        for e in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', lambda e: e.group(0).upper(), text)
        for f in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', lambda f: f.group(0).upper(), text)
        for g in re.finditer(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', text):                    
            text = re.sub(r'[\_]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\w*[\_]', lambda c: g.group(0).upper(), text)
            
        #lowercases any akkadian found in the above section #
        for cc in re.finditer(r'\_[\s-]\w*[\s-]\_', text):
            text = re.sub(r'\_[\s-]\w*[\s-]\_', lambda cc: cc.group(0).lower(), text)
        for dd in re.finditer(r'\_[\s-]\w*[\s-]\w*[\s-]\_', text):
            text = re.sub(r'\_[\s-]\w*[\s-]\w*[\s-]\_', lambda dd: dd.group(0).lower(), text)
        for ee in re.finditer(r'\_[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\_', text):
            text = re.sub(r'\_[\s-]\w*[\s-]\w*[\s-]\w*[\s-]\_', lambda ee: ee.group(0).lower(), text)
        #removes logogram markers and replaces hypens for periods -- keep off until above is checked and balanced.
            
            #text = re.sub(r'_', '', text)
        return text   

In [4]:
"""Testing Pad"""

f = open("Akkadiham.txt", 'r')
original = f.read()

test = ATFConvertertest()
hammurabi_text = test.testing(original)

print(hammurabi_text)

Primary publication: RIME 4.03.06.add21, ex. 01
Author(s): Frayne, Douglas R.
Publication date: 1990
Secondary publication(s): Bergmann, Eugen, Codex Hammurapi
Collection: Louvre Museum, Pariṣ France
Museum no.: Sb 00008
Accession no.: 
Provenience: Susa (mod. Shush)
Excavation no.: 
Period: Old Babylonian (ca. 1900.1600 BC)
Dates referenced: Hammurapi.00.00.00
Object type: other (see object remarks)
Remarks: stele
Material: stone: basalt
Language: Akkadian
Genre: Royal/Monumental
Sub.genre: witness
CDLI comments: 
Catalogue source: 20041012 cdliadmin
ATF source: Roth, Martha
Translation: 
UCLA Library ARK: 21198/zz001t₈p₉j
Composite no.: Q006387
Seal no.: 
CDLI no.: P249253
Transliteration:
&P249253 = RIME 4.03.06.add21, ex. 01 
#atf: lang akk 
@object stele 
@surface a 
@column 1 
@prologue 
1. ì.nu an ṣi.ru.um 
2. _LUGAL_ ᵈa.nun.na.ki 
3. ᵈen.líl 
4. be.el ša.me.e 
5. ù er.ṣe.tim 
6. ša.i.im 
7. ši.ma.at _KALAM_ 
8. a.na ᵈmarduk 
9. _DUMU_ re.eš.ti.im 
10. ša ᵈen.ki 
11. ᵈen.líl.ut 

In [7]:
"""
next steps:

a) months -- e.g. law 13 line 17, 19 _iti 6(sign)-kam_

(r'[iI][tT][iI]\s' ... ? -- I'd rather be able to just say "between ITI and -KAM, capitalize the string)

b) broken signs (#): law 28 line 30; law 30 line 51; ! -59 near the end "a-zu!"

Haven't looked into a solution yet

c) change hypen to periods

Issue: don't want it to affect the akkadian

## Additional Bugs: ##

c) 3 akkadian signs between signs 62 - LUGAL_ sha i-na _kalam
d) line 94 -- mistype of text -> shouldn't be between underscores : _ ISZ-QU2-LU A-NA _KU3-BABBER_DAM-GAR3 _
e) law 277 line 54 and 56 -- the parenthesis messes it all up
f) e.g. 274 - all broken up
g) Law 15

--

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-7-209a8bc982ca>, line 3)