In [196]:
import json
import codecs
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import silhouette_score
pd.options.display.max_rows = 200
#pd.reset_option('display')

<h2>Gathering the Data</h2>
<p>In this initial section, we will gather all of the cuneiform sign transliterations from the JSON files in our dataset. Then we will consolidate them into a data frame and add other columns to help our computations.</p>

<p>First, we will load a map from sign value to sign name to use on the signs in our texts. The OGSL is... (website...)</p>

In [2]:
file_ogsl = codecs.open('ogsl-sl.json','r','utf-8')
ogsl = json.load(file_ogsl)
sign_index = ogsl['index']

<h3>Collect the Text Signs</h3>
<p>The following code parses the JSON files of the ORACC texts and collects each sign transliteration. Since different signs have different types of reading, they are rendered differently in the JSON file and we must take care to recognize each sign reading type in the JSON file</p>
The types of signs and their representation in the JSON Files:
<ol>
    <li>Syllable - The reading of a sign as a syllable is rendered with a 'v' key</li>
    <li>Logogram - The reading of a sign as a logogram, i.e. one represents a word in itself or as part of a complex of signs that represents a single word is written in capital letters and with a 's' key</li>
    <li>Numerical - A sign representing a number (or personal name determinative) has an extra key called 'sexified'. This gives information on the number sign's wedge structure.</li>
</ol>

In addition, a modified sign can be any of the three types above, but written with a nonstandard paleography (e.g. a diagonal wedge is incised in the clay instead of a horizontal). These are the signs we want to examine. They have extra data given under the 'mods' key.

In [3]:
def process_signs(sign_data):
    sign_info = {}
    if 'v' in sign_data: #This is the label for a standard syllable
        sign_info['b'] = sign_data['v']
    if 's' in sign_data: #This is the label for elements of a logogram
        sign_info['b'] = sign_data['s']
    if 'n' in sign_data:
        sign_info['b'] = sign_data.get('sexified',sign_data.get('form','noform?'))
    if 'mods' in sign_data:
        for m in sign_data['mods']:
            for d in m:
                sign_info[d] = m[d]
    if 'break' in sign_data:
        sign_info['break'] = sign_data['break']
    sign_info['sign_loc_id'] = sign_data.get('id','no-id')
    return sign_info    

In [116]:
def parse_corpus(corpus_name):
    types = set()
    all_signs = []
    all_words = []
    for fname in os.listdir(corpus_name + '/corpusjson'):
        f = codecs.open(corpus_name + '/corpusjson/'+fname,'r','utf-8')
        try:
            j = json.load(f)
        except ValueError:
            print('Could not load: ' + fname)
            continue
        text_id = j['textid']
        for a in j['cdl'][0]['cdl']:
            if a.get('type','') == 'discourse':
                for b in a['cdl']:
                    if b.get('type','') == 'sentence':
                        line_label = ''
                        word_count = 0
                        for c in b['cdl']:
                            if c.get('node','') == 'd': #This is the label for the line e.g. "o ii 3"
                                line_label = c.get('label','nolabel')
                            if c.get('node','') == 'l': #This is the label for a regular word in a line
                                if c.get('tail-sig','') != '': #An extra word??
                                    continue
                                word_count += 1
                                form = c['f']['form']
                                frag = c['frag']
                                ref  = c['ref']
                                cf   = c['f'].get('cf','no-cf')
                                gw   = c['f'].get('gw','no-gw')
                                pos  = c['f'].get('pos','no-pos')
                                sense = c['f'].get('sense','no-sense')
                                norm = c['f'].get('norm','no-norm')
                                epos = c['f'].get('epos','no-epos')
                                word_sign_tot = len(c['f']['gdl'])
                                word_info = {'file':fname,'line_label':line_label,'form': form,'frag': frag, 'text_id': text_id,
                                             'ref': ref,'cf': cf,'gw': gw,'pos': pos,'epos':epos,
                                             'sense':sense,'word_sign_tot':word_sign_tot,'norm':norm,'word_num': word_count}
                                all_words.append(word_info)
                                for sign_data in c['f']['gdl']:
                                    if sign_data.get('det','') == 'semantic':
                                        for sd in sign_data['seq']:
                                            if sd.get('gg','') == 'logo':
                                                for g in sd['group']:
                                                    sign_info = process_signs(g)
                                                    sign_info.update(word_info)
                                                    all_signs.append(sign_info)
                                            else:
                                                sign_info = process_signs(sd)
                                                sign_info.update(word_info)
                                                all_signs.append(sign_info)
                                    elif sign_data.get('gg','') == 'logo':
                                        for g in sign_data['group']:
                                            if g.get('det','') == 'semantic':
                                                for sd in g['seq']:
                                                    if sd.get('gg','') == 'logo':
                                                        for gg in sd['group']:
                                                            sign_info = process_signs(gg)
                                                            sign_info.update(word_info)
                                                            all_signs.append(sign_info)                                       
                                                    else:
                                                        sign_info = process_signs(sd)
                                                        sign_info.update(word_info)
                                                        all_signs.append(sign_info)                                       
                                            else:
                                                sign_info = process_signs(g)
                                                sign_info.update(word_info)
                                                all_signs.append(sign_info)                                        
                                    else:
                                        sign_info = process_signs(sign_data)
                                        sign_info.update(word_info)
                                        all_signs.append(sign_info)
                            '''
                            if c.get('node','') == 'c': #This is the label for a phrase. This seems to no longer be used 
                                for d in c['cdl']:
                                    if d.get('node','') == 'l':
                                        form = d['f']['form']
                                        for sign_data in d['f']['gdl']:
                                            if sign_data.get('det','') == 'semantic':
                                                for sd in sign_data['seq']:
                                                    sign_info = process_signs(sd)
                                                    sign_info.update({'file':fname,'line_label':line_label,'form': form})
                                                    all_signs.append(sign_info)
                                            else:
                                                sign_info = process_signs(sign_data)
                                                sign_info.update({'file':fname,'line_label':line_label,'form': form})
                                                all_signs.append(sign_info)
                            '''
                            #types.add(c.get('type','no type'))

    print('done')
    return all_signs,all_words

In [109]:
sargon_signs,sargon_words = parse_corpus('sargonletters')

Could not load: P314095.json
done


In [110]:
df_sargon = pd.DataFrame(sargon_signs)
df_sargon = df_sargon.fillna('')
df_sargon

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,line_label,m,norm,pos,ref,sense,sign_loc_id,text_id,word_num,word_sign_tot
0,,a,damaged,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,o 1,,abat,N,P224485.2.1,word,P224485.2.1.0,P224485,1,2
1,,bat,,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,o 1,,abat,N,P224485.2.1,word,P224485.2.1.1,P224485,1,2
2,,LUGAL,,šarru,N,,P224485.json,LUGAL,LUGAL,king,o 1,,šarri,N,P224485.2.2,king,P224485.2.2.0,P224485,2,1
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,o 1,,ana,PRP,P224485.2.3,to,P224485.2.3.0,P224485,3,2
4,,na,,ana,PRP,t,P224485.json,a-na,a-na\t,to,o 1,,ana,PRP,P224485.2.3,to,P224485.2.3.1,P224485,3,2
5,,1(diš),,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.0,P224485,4,4
6,,aš,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.1,P224485,4,4
7,,šur,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.2,P224485,4,4
8,,MAN,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.3,P224485,4,4
9,,PAB,damaged,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.4,P224485,4,4


Now, we form our Data Frame where each row contains information on every sign in the corpus. Further limitations on which signs are significant to our purposes will be made later, but for now we will eliminate all of the signs which are labelled as "missing," (i.e. reconstructed) because any information based on their paleography or orthography cannot be ascertained.

In [99]:
df = pd.DataFrame(all_signs)
df = df.fillna('')
df

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,line_label,m,norm,pos,ref,sense,sign_loc_id,text_id,word_num,word_sign_tot
0,,a,damaged,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,o 1,,abat,N,P224485.2.1,word,P224485.2.1.0,P224485,1,2
1,,bat,,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,o 1,,abat,N,P224485.2.1,word,P224485.2.1.1,P224485,1,2
2,,LUGAL,,šarru,N,,P224485.json,LUGAL,LUGAL,king,o 1,,šarri,N,P224485.2.2,king,P224485.2.2.0,P224485,2,1
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,o 1,,ana,PRP,P224485.2.3,to,P224485.2.3.0,P224485,3,2
4,,na,,ana,PRP,t,P224485.json,a-na,a-na\t,to,o 1,,ana,PRP,P224485.2.3,to,P224485.2.3.1,P224485,3,2
5,,1(diš),,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.0,P224485,4,4
6,,aš,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.1,P224485,4,4
7,,šur,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.2,P224485,4,4
8,,MAN,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.3,P224485,4,4
9,,PAB,damaged,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485.2.4.4,P224485,4,4


<h3>Setting Up the Data for Clustering</h3>
<p>The general goal is to assign a vector to each text that reflects the usage of variant orthography and paleography.</p>
<ol>
    <li>Paleography - Any one set of wedges that we classify as a sign can be impressed on the clay in different ways. For example, a wedge can be missing or one can be added. Also, the tilt of a wedge can variable. These are the features we want to examine in order to see if one text prefers one sign writing or another.</li>
    <li>Orthography - Due to the homophony of the cuneiform writing system, one syllable can be written with many signs. For example, 'li' can be written with the LI-sign but also with the NI-sign, in which case it would be transliterated as li<sub>2</sub></li>
    <li>Orthography (Words) - In addition to syllables being written by variant sign forms, a word (with appropriate inflection) can be written in different ways similar to how "color" is written "colour" in British English. These two variant orthographies indicate the same word but their variation might indicate something about the author.</li>
</ol>

<p>This section therefore contains three subsections. One groups the diagnostic signs with or without modifications per text to. The next discovers the homophonous signs used throughout the corpus and groups different usages per text. The third groups the word forms in the corpus.</p>

First of all, let's create more columns in the data frame to aid us
<ol>
<li>sign_form - Using the OGSL mapping we created earlier, we can assign each reading of a sign to its sign form. This will help us combine different readings under the same sign form to help us mark variations in paleography</li>
<li>mods_str - Since the data contains three columns currently with information on variable paleography, it would help us to consolidate them into one column</li>
<li>combined - This column combines sign_form and mods_str into one string similar to how the <i>b</i> column is a combination of str_part and num_part</li>
<li>str_part and num_part - In order to determine which signs share a syllabic value, it will be useful to separate the transliterated readings into their string components and numerical components. Once we do this, we can group rows with the same str_part and count up the different usages of homophonous signs</li>
</ol>

In [100]:
file_names = df['file'].unique()
df['sign_form'] = df['b'].apply(lambda x: sign_index.get(x.lower(),'?'))
df['mods_str'] = df['a'] + '.' + df['f']  + '.' + df['m']

import re
def get_num_part(s):
    try:
        n = re.findall(r'[₀₁₂₃₄₅₆₇₈₉]+',s)[0]
        n = n.replace('₀','0').replace('₁','1').replace('₂','2').replace('₃','3').replace('₄','4')
        n = n.replace('₅','5').replace('₆','6').replace('₇','7').replace('₈','8').replace('₉','9')
    except:
        n = 1
    return n
def get_str_part(s):
    try:
        n = re.findall(r'[a-zA-ZšŠṣṢṭṬʾ \(\)0-9]+',s)[0]
    except:
        n = s
    return n
        
df['str_part'] = df['b'].apply(lambda x: get_str_part(x))
df['num_part'] = df['b'].apply(lambda x: get_num_part(x))
df['combined'] = df['sign_form'] + ':' + df['mods_str']
df

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,sense,sign_loc_id,text_id,word_num,word_sign_tot,sign_form,mods_str,str_part,num_part,combined
0,,a,damaged,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,word,P224485.2.1.0,P224485,1,2,A,..,a,1,A:..
1,,bat,,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,word,P224485.2.1.1,P224485,1,2,BAD,..,bat,1,BAD:..
2,,LUGAL,,šarru,N,,P224485.json,LUGAL,LUGAL,king,...,king,P224485.2.2.0,P224485,2,1,LUGAL,..,LUGAL,1,LUGAL:..
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,to,P224485.2.3.0,P224485,3,2,A,..,a,1,A:..
4,,na,,ana,PRP,t,P224485.json,a-na,a-na\t,to,...,to,P224485.2.3.1,P224485,3,2,,.t.,na,1,NA:.t.
5,,1(diš),,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,1,P224485.2.4.0,P224485,4,4,DIŠ,..,1(diš),1,DIŠ:..
6,,aš,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,1,P224485.2.4.1,P224485,4,4,AŠ,..,aš,1,AŠ:..
7,,šur,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,1,P224485.2.4.2,P224485,4,4,SUR,..,šur,1,SUR:..
8,,MAN,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,1,P224485.2.4.3,P224485,4,4,|U.U|,..,MAN,1,|U.U|:..
9,,PAB,damaged,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,1,P224485.2.4.4,P224485,4,4,PAP,..,PAB,1,PAP:..


<h3>Paleography Setup</h3>
<p>The Data Frame we have contains the entire collection of signs in the corpus. However, not every sign has variants in paleography (at least according to Parpola's data input). We only want to look at the signs which have these variants, which we will term diagnostic. In the data, they are the signs that include any type of modification</p>

In [7]:
df2 = df[~(df['mods_str'] == '..')]
list_mod_signs = sorted(list(df2['sign_form'].unique()))

df_paleo = df[df['sign_form'].isin(list_mod_signs)]

#remove damaged signs too
df_paleo = df_paleo[df_paleo['break'] != 'damaged']
df_paleo

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,ref,sense,sign_loc_id,text_id,word_sign_tot,sign_form,mods_str,str_part,num_part,combined
1,,bat,,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,P224485.2.1,word,P224485.2.1.1,P224485,2,BAD,..,bat,1,BAD:..
2,,LUGAL,,šarru,N,,P224485.json,LUGAL,LUGAL,king,...,P224485.2.2,king,P224485.2.2.0,P224485,1,LUGAL,..,LUGAL,1,LUGAL:..
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,P224485.2.3,to,P224485.2.3.0,P224485,2,A,..,a,1,A:..
4,,na,,ana,PRP,t,P224485.json,a-na,a-na\t,to,...,P224485.2.3,to,P224485.2.3.1,P224485,2,,.t.,na,1,NA:.t.
7,,šur,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4,1,P224485.2.4.2,P224485,4,SUR,..,šur,1,SUR:..
11,,mu,,šulmu,N,,P224485.json,šul-mu,šul⸣-mu,completeness,...,P224485.2.5,health,P224485.2.5.1,P224485,2,MU,..,mu,1,MU:..
12,,ia,,yâšim,IP,,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,...,P224485.2.6,me,P224485.2.6.0,P224485,3,|I.A|,..,ia,1,|I.A|:..
14,,ši,,yâšim,IP,,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,...,P224485.2.6,me,P224485.2.6.2,P224485,3,IGI,..,ši,1,IGI:..
16,,mu,,šulmu,N,,P224485.json,šul-mu,⸢šul⸣-mu,completeness,...,P224485.3.1,health,P224485.3.1.1,P224485,2,MU,..,mu,1,MU:..
17,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,P224485.3.2,to,P224485.3.2.0,P224485,2,A,..,a,1,A:..


Create the text matrix with raw counts

In [8]:
df_paleo_str = pd.DataFrame(df_paleo.groupby(['text_id']).apply(lambda x: ' '.join(x['combined'])))
df_paleo_str.columns = ['paleo_str']
df_paleo_str

cv = CountVectorizer(token_pattern='[^ ]+',lowercase=False)
ft = cv.fit_transform(list(df_paleo_str['paleo_str']))
tm_paleo = pd.DataFrame(ft.toarray(),columns=cv.get_feature_names(),index=df_paleo_str.index)
tm_paleo

Unnamed: 0_level_0,A:..,A:.d.,AB@g:..,AB@g:.m.,AB@g:.p.,AB₂:..,AB₂:.d.,AK:..,AK:.d.,AK:.dt.,...,ŠIM:..,ŠIM:.d.,ŠIM:.p.,ŠIM:.t.,ŠU:..,ŠU:.d.,ŠU:.m.,ŠU₂:..,ŠU₂:.d.,ŠU₂:.t.
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P224485,104,0,1,0,0,1,0,1,0,0,...,3,0,0,2,4,0,0,28,0,0
P237089,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
P238649,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P313416,2,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
P313417,39,0,0,0,0,0,0,0,0,0,...,0,0,0,0,8,0,0,0,0,0
P313419,16,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,2,0,0
P313420,13,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2,0,0
P313421,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
P313422,10,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,3,0,0
P313425,47,0,0,0,0,0,0,0,0,0,...,0,0,0,0,3,0,1,0,0,0


<h3>Orthography Syllable Setup</h3>
<p>We now limit the original data frame in different way based on orthography. First we need to figure out which syllabic readings have multiple signs that can render them. We then eliminate capital letter entries because indices on logograms indicate different words and are not relevant here. Last, we limit the data frame to only these signs.

In [9]:
df2 = pd.DataFrame(df.groupby(['str_part'])['num_part'].agg('nunique'))
list_ortho_syls = list(df2[df2[('num_part')] > 1].index)

list_ortho_syls = [h for h in list_ortho_syls if len(re.findall(r'[A-Z]',h)) == 0]
list_ortho_syls

df_ortho_signs = df[df['str_part'].isin(list_ortho_syls)]
df_ortho_signs

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,ref,sense,sign_loc_id,text_id,word_sign_tot,sign_form,mods_str,str_part,num_part,combined
0,,a,damaged,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,P224485.2.1,word,P224485.2.1.0,P224485,2,A,..,a,1,A:..
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,P224485.2.3,to,P224485.2.3.0,P224485,2,A,..,a,1,A:..
6,,aš,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4,1,P224485.2.4.1,P224485,4,AŠ,..,aš,1,AŠ:..
12,,ia,,yâšim,IP,,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,...,P224485.2.6,me,P224485.2.6.0,P224485,3,|I.A|,..,ia,1,|I.A|:..
13,,a,damaged,yâšim,IP,,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,...,P224485.2.6,me,P224485.2.6.1,P224485,3,A,..,a,1,A:..
17,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,P224485.3.2,to,P224485.3.2.0,P224485,2,A,..,a,1,A:..
20,,aš,,Mat-Aššur,GN,,P224485.json,KUR-aš-šur{KI},KUR-aš-šur{ki},Assyria,...,P224485.3.3,Assyria,P224485.3.3.1,P224485,4,AŠ,..,aš,1,AŠ:..
24,,ka,missing,libbu,N,,P224485.json,ŠA₃-ka,⸢ŠA₃⸣-[ka],interior,...,P224485.3.4,mood,P224485.3.4.1,P224485,2,KA,..,ka,1,KA:..
28,,ka,,ṭābu,AJ,,P224485.json,DUG₃.GA-ka,DUG₃.GA-ka,good,...,P224485.3.6,good,P224485.3.6.2,P224485,2,KA,..,ka,1,KA:..
29,,ša,damaged,ša,REL,,P224485.json,ša,⸢ša⸣,that,...,P224485.4.1,what,P224485.4.1.0,P224485,1,ŠA,..,ša,1,ŠA:..


Create the text matrix with raw counts

In [10]:
df_ortho_str = pd.DataFrame(df_ortho_signs.groupby(['text_id']).apply(lambda x: ' '.join(x['b'])))
df_ortho_str.columns = ['ortho_str']
df_ortho_str

cv = CountVectorizer(token_pattern='[^ ]+',lowercase=False)
ft = cv.fit_transform(list(df_ortho_str['ortho_str']))
tm_ortho_sign = pd.DataFrame(ft.toarray(),columns=cv.get_feature_names(),index=df_ortho_str.index)
tm_ortho_sign

Unnamed: 0_level_0,a,ana,ana₃,ar,ar₂,aš,aš₂,a₂,be,be₂,...,ša₂,šu,šum,šum₂,šu₂,ṭe,ṭe₂,ṭe₃,ṭi,ṭi₂
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P224485,103,2,0,6,0,5,8,0,7,2,...,5,4,1,0,33,0,0,1,0,0
P237089,5,0,0,0,0,0,0,0,2,0,...,0,1,0,0,0,0,0,0,0,0
P238649,5,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,2,0,0
P313416,3,0,0,0,0,0,0,0,3,0,...,0,0,0,0,0,0,0,0,0,0
P313417,36,0,0,0,0,1,2,0,0,0,...,0,8,0,0,0,1,0,1,1,0
P313419,16,0,0,0,0,0,0,0,0,0,...,1,0,0,0,2,0,0,1,0,1
P313420,14,1,0,1,0,1,0,0,6,0,...,0,0,0,0,3,0,0,1,0,0
P313421,7,0,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
P313422,11,1,0,2,0,1,1,0,1,1,...,0,0,0,0,4,0,0,0,0,0
P313425,51,0,0,5,0,0,1,0,5,0,...,3,3,1,1,0,0,0,0,0,0


<h3>Orthography Word Setup</h3>
<p>The setup for this section is slightly different. Instead of counting up every sign or reading, we count up every word, its normalization and its form. To help us out we will create two new columns <i>lemma_norm</i> and <i>lemma_norm_form</i>. The first combines the lemma and the normalized form. The latter attaches the transliteration to the former. We only want to include lemma/norm combinations that have multiple forms associated with them.

In [397]:
df_words = pd.DataFrame(all_words)
df_words = df_words[(df_words['cf'] != 'no-cf')]

df_words['lemma'] = df_words['cf'] + '[' + df_words['gw'] + ']' + df_words['pos']
df_words['lemma_norm'] = df_words['lemma'] + ':' + df_words['norm']
df_words['lemma_norm_form'] = df_words['lemma_norm'] + ':' + df_words['form']

df_norm_uniq = pd.DataFrame(df_words.groupby('lemma_norm')['form'].nunique())
list_ortho_words = list(df_norm_uniq[df_norm_uniq[('form')] > 1].index)

df_form_50 = pd.DataFrame(df_words.groupby('lemma_norm_form')['form'].agg('count'))
list_form_50 = list(df_form_50[df_form_50[('form')] > 50].index)

df_ortho_words = df_words[(df_words['lemma_norm'].isin(list_ortho_words)) & df_words['lemma_norm_form'].isin(list_form_50)]
df_ortho_words

Unnamed: 0,cf,epos,file,form,frag,gw,line_label,norm,pos,ref,sense,text_id,word_num,word_sign_tot,lemma,lemma_norm,lemma_norm_form
1,šarru,N,P224485.json,LUGAL,LUGAL,king,o 1,šarri,N,P224485.2.2,king,P224485,2,1,šarru[king]N,šarru[king]N:šarri,šarru[king]N:šarri:LUGAL
2,ana,PRP,P224485.json,a-na,a-na\t,to,o 1,ana,PRP,P224485.2.3,to,P224485,3,2,ana[to]PRP,ana[to]PRP:ana,ana[to]PRP:ana:a-na
4,šulmu,N,P224485.json,šul-mu,šul⸣-mu,completeness,o 1,šulmu,N,P224485.2.5,health,P224485,5,2,šulmu[completeness]N,šulmu[completeness]N:šulmu,šulmu[completeness]N:šulmu:šul-mu
6,šulmu,N,P224485.json,šul-mu,⸢šul⸣-mu,completeness,o 2,šulmu,N,P224485.3.1,health,P224485,7,2,šulmu[completeness]N,šulmu[completeness]N:šulmu,šulmu[completeness]N:šulmu:šul-mu
7,ana,PRP,P224485.json,a-na,a-na\t,to,o 2,ana,PRP,P224485.3.2,to,P224485,8,2,ana[to]PRP,ana[to]PRP:ana,ana[to]PRP:ana:a-na
10,lū,MOD,P224485.json,lu,⸢lu⸣,may,o 2,lū,MOD,P224485.3.5,may,P224485,11,1,lū[may]MOD,lū[may]MOD:lū,lū[may]MOD:lū:lu
12,ša,REL,P224485.json,ša,⸢ša⸣,that,o 3,ša,REL,P224485.4.1,what,P224485,13,1,ša[that]REL,ša[that]REL:ša,ša[that]REL:ša:ša
14,mā,PRP,P224485.json,ma-a,ma-a,saying,o 3,mā,PRP,P224485.4.3,saying,P224485,15,2,mā[saying]PRP,mā[saying]PRP:mā,mā[saying]PRP:mā:ma-a
16,ša,DET,P224485.json,ša,[ša],of,o 3,ša,DET,P224485.4.5,of,P224485,17,1,ša[of]DET,ša[of]DET:ša,ša[of]DET:ša:ša
19,ina,PRP,P224485.json,ina,ina,in,o 4,ina,PRP,P224485.5.2,in,P224485,20,1,ina[in]PRP,ina[in]PRP:ina,ina[in]PRP:ina:ina


Create the text matrix with raw counts

In [12]:
df_ortho_wordstr = pd.DataFrame(df_ortho_words.groupby(['text_id']).apply(lambda x: ' '.join(x['lemma_norm_form'])))
df_ortho_wordstr.columns = ['ortho_wordstr']
df_ortho_str

cv = CountVectorizer(token_pattern='[^ ]+',lowercase=False)
ft = cv.fit_transform(list(df_ortho_wordstr['ortho_wordstr']))
tm_ortho_word = pd.DataFrame(ft.toarray(),columns=cv.get_feature_names(),index=df_ortho_wordstr.index)
tm_ortho_word.to_csv('output/tm_ortho_word.csv',encoding='utf-8',sep='\t')
tm_ortho_word

Unnamed: 0_level_0,Urarṭaya[Urarṭian]EN:Urarṭaya:{KUR}URI-a.a,adanniš[very,adi[until]PRP:adi:a-di,akī[as]PRP:akī:a-ki,alāku[go]V:ittalka:it-tal-ka,ammar[as,ana[to]PRP:ana:a-na,annûri[now]AV:annurig:an-nu-rig,anāku[I]IP:anāku:a-na-ku,ardu[slave]N:urdaka:ARAD-ka,...,šumma[if]MOD:šumma:šum₂-ma,šumma[if]MOD:šummu:šum₂-mu,šunu[they]IP:šunu:šu-nu,šū[he]IP:šû:šu-u,šū[he]IP:šû:šu-u₂,ūma[today]AV:ūmâ:u₂-ma-a,ṣābu[people]N:ṣābāni:ERIM-MEŠ,ṣābu[people]N:ṣābāni:{LU₂}ERIM-MEŠ,ṭābu[good]AJ:ṭāb:DUG₃.GA,ṭēmu[(fore)thought]N:ṭēmu:ṭe₃-e-mu
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P224485,0,2,0,0,2,1,11,4,0,0,...,0,0,0,0,1,3,3,0,0,0
P237089,0,0,0,0,0,0,2,0,0,1,...,0,0,0,0,0,0,0,0,1,0
P238649,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
P313416,0,0,0,0,0,0,3,0,0,1,...,0,0,0,0,0,0,0,0,0,0
P313417,0,0,0,0,0,1,7,0,0,1,...,0,0,0,0,0,2,1,0,0,1
P313419,0,0,0,0,0,0,5,1,0,1,...,0,0,0,0,0,0,0,0,0,1
P313420,0,0,1,0,0,0,3,0,0,1,...,0,0,0,0,0,0,1,0,0,1
P313421,0,0,0,0,0,0,2,0,0,1,...,0,0,0,0,0,0,0,0,0,0
P313422,0,0,1,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
P313425,0,1,0,0,0,0,10,0,3,1,...,1,0,1,0,1,2,0,0,0,0


Put them all together

In [13]:
tm_counts = pd.concat([tm_paleo,tm_ortho_sign,tm_ortho_word],axis=1)

<h3>Selecting the Features</h3>

<p>Three criteria are employed for selecting the initial list of features for clustering</p>
<ol>
    <li>Variations that have the highest frequency throughout the corpus</li>
    <li>Variations that are used in a mixed distribution rather than a complementary distribution. This criterion is only for the paleography and syllable variation, whose context is examined through the words they appear in. The form variations themselves thus cannot be examined in this context.</li>
    <li>Variations that look "good" based on an examination of the feature count matrices and their distrubtion within certain groups to be defined later</li>
</ol>
    Obviously, we cannot look at (3) right now but we can look at each of the first two for the three feature types.

<p>1.1 Paleography Frequencies</p>

In [14]:
df_paleo_count = pd.DataFrame(df_paleo.groupby(['sign_form','combined'])['cf'].agg('count'))
df_paleo_count = df_paleo_count[df_paleo_count[('cf')] > 100]

df_paleo_uniq = df_paleo_count.groupby([('sign_form')]).nunique()

df_paleo_count = df_paleo_count.loc[list(df_paleo_uniq[df_paleo_uniq[('cf')] > 1].index)]
df_paleo_count

Unnamed: 0_level_0,Unnamed: 1_level_0,cf
sign_form,combined,Unnamed: 2_level_1
BA,BA:..,401
BA,BA:.t.,120
BU,BU:..,960
BU,BU:.p.,109
DA,DA:..,434
DA,DA:.d.,201
DI,DI:..,1024
DI,DI:.d.,156
LI,LI:..,465
LI,LI:.d.,147


<p>1.2 Syllable Frequencies</p>

In [15]:
df_ortho_sign_count = pd.DataFrame(df_ortho_signs.groupby(['str_part','b'])['cf'].agg('count'))
df_ortho_sign_count = df_ortho_sign_count[df_ortho_sign_count[('cf')] > 100]

df_ortho_sign_uniq = df_ortho_sign_count.groupby([('str_part')]).nunique()

df_ortho_sign_count = df_ortho_sign_count.loc[list(df_ortho_sign_uniq[df_ortho_sign_uniq[('cf')] > 1].index)]
df_ortho_sign_count

Unnamed: 0_level_0,Unnamed: 1_level_0,cf
str_part,b,Unnamed: 2_level_1
aš,aš,392
aš,aš₂,427
ia,ia,1724
ia,ia₂,424
li,li,701
li,li₂,1191
tu,tu,440
tu,tu₂,533
u,u,1503
u,u₂,1843


<p>1.3 Word Form Frequencies</p>

In [16]:
df_ortho_word_count = pd.DataFrame(df_words.groupby(['lemma_norm','lemma_norm_form'])['cf'].agg('count'))
df_ortho_word_count = df_ortho_word_count[df_ortho_word_count[('cf')] > 50]

df_ortho_word_uniq = df_ortho_word_count.groupby([('lemma_norm')]).nunique()

df_ortho_word_count = df_ortho_word_count.loc[list(df_ortho_word_uniq[df_ortho_word_uniq[('cf')] > 1].index)]
df_ortho_word_count

Unnamed: 0_level_0,Unnamed: 1_level_0,cf
lemma_norm,lemma_norm_form,Unnamed: 2_level_1
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:EN,182
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:be-li₂,642
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:EN-a,76
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:EN-ia,770
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:EN-ia₂,190
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:be-li₂-ia,309
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:be-li₂-ia₂,194
ištu[from]PRP:issu,ištu[from]PRP:issu:TA,113
ištu[from]PRP:issu,ištu[from]PRP:issu:TA@v,346
libbu[interior]N:libbi,libbu[interior]N:libbi:ŠA₃,217


<p>2.1 Paleography Distrubtion within words

Now let's try to apply a quantitative method to figure out the level of mixed distribution which paleographic variants bear within word forms. The steps here are:
<ol>
    <li>Select only the particular sign forms and modifications that appear a sufficient number of times within the same forms</li>
    <li>Count the number of times these sign forms and modiciations occur in each text</li>
    <li>Create a text matrix which contains the appropriate distribution for the modifications within each sign form, adding 1 to each cell to avoid divide by zero issues</li>
</ol>

In [68]:
df_mods_agg = pd.DataFrame(df_paleo.groupby(['sign_form','form','mods_str'])['a'].agg('count')).reset_index()
df_mods_agg.columns = ['sign_form','form','mods_str','count']
#first let's remove where total instances are less than a certain arbitrary value, say 5
df_mods_agg = df_mods_agg[df_mods_agg['count'] >= 5]
#NOW find and only keep the rows where sign_form and form are duplicates
df_mods_agg['is_dup'] = df_mods_agg.duplicated(['sign_form','form'],False)
df_mods_agg = df_mods_agg[df_mods_agg['is_dup'] == True]
df_mods_agg.to_csv('output/dist_paleo.csv',encoding='utf-8')
df_mods_agg

Unnamed: 0,sign_form,form,mods_str,count,is_dup
1894,ANŠE,ANŠE,..,26,True
1895,ANŠE,ANŠE,.d.,7,True
1919,ANŠE,{ANŠE}KUR.RA-MEŠ,..,32,True
1920,ANŠE,{ANŠE}KUR.RA-MEŠ,.d.,13,True
1988,ARAD,ARAD-ka,..,426,True
1990,ARAD,ARAD-ka,.p.,17,True
2007,ARAD,{LU₂}ARAD-MEŠ,..,27,True
2008,ARAD,{LU₂}ARAD-MEŠ,.p.,5,True
2273,BA,e-tar-ba,..,14,True
2274,BA,e-tar-ba,.t.,5,True


In [61]:
df_select_signmods = df_mods_agg[['sign_form','mods_str']].drop_duplicates()
df_select_signmods['combined'] = df_select_signmods['sign_form'] + ':' + df_select_signmods['mods_str']
df_select_signmods

Unnamed: 0,sign_form,mods_str,combined
1894,ANŠE,..,ANŠE:..
1895,ANŠE,.d.,ANŠE:.d.
1988,ARAD,..,ARAD:..
1990,ARAD,.p.,ARAD:.p.
2273,BA,..,BA:..
2274,BA,.t.,BA:.t.
3163,BU,..,BU:..
3164,BU,.p.,BU:.p.
3165,BU,.t.,BU:.t.
3933,DA,..,DA:..


<p>2.2 Same for Syllable Orthography</p>

In [27]:
df_ortho_signs['form_str_part'] = df_ortho_signs['form'].apply(lambda x: re.sub(r'[₁₂₃₄₅₆₇₈₉₀]','',x))
df_ortho_signs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,sense,sign_loc_id,text_id,word_sign_tot,sign_form,mods_str,str_part,num_part,combined,form_str_part
0,,a,damaged,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,word,P224485.2.1.0,P224485,2,A,..,a,1,A:..,a-bat
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,to,P224485.2.3.0,P224485,2,A,..,a,1,A:..,a-na
6,,aš,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,1,P224485.2.4.1,P224485,4,AŠ,..,aš,1,AŠ:..,{1}aš-šur-MAN-PAB
12,,ia,,yâšim,IP,,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,...,me,P224485.2.6.0,P224485,3,|I.A|,..,ia,1,|I.A|:..,ia-a-ši
13,,a,damaged,yâšim,IP,,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,...,me,P224485.2.6.1,P224485,3,A,..,a,1,A:..,ia-a-ši
17,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,to,P224485.3.2.0,P224485,2,A,..,a,1,A:..,a-na
20,,aš,,Mat-Aššur,GN,,P224485.json,KUR-aš-šur{KI},KUR-aš-šur{ki},Assyria,...,Assyria,P224485.3.3.1,P224485,4,AŠ,..,aš,1,AŠ:..,KUR-aš-šur{KI}
24,,ka,missing,libbu,N,,P224485.json,ŠA₃-ka,⸢ŠA₃⸣-[ka],interior,...,mood,P224485.3.4.1,P224485,2,KA,..,ka,1,KA:..,ŠA-ka
28,,ka,,ṭābu,AJ,,P224485.json,DUG₃.GA-ka,DUG₃.GA-ka,good,...,good,P224485.3.6.2,P224485,2,KA,..,ka,1,KA:..,DUG.GA-ka
29,,ša,damaged,ša,REL,,P224485.json,ša,⸢ša⸣,that,...,what,P224485.4.1.0,P224485,1,ŠA,..,ša,1,ŠA:..,ša


In [70]:
df_syls_agg = pd.DataFrame(df_ortho_signs.groupby(['str_part','form_str_part','b'])['a'].agg('count')).reset_index()
df_syls_agg.columns = ['str_part','form_str_part','b','count']
#first let's remove where total instances are less than a certain arbitrary value, say 5
df_syls_agg = df_syls_agg[df_syls_agg['count'] >= 3]
#NOW find and only keep the rows where sign_form and form are duplicates
df_syls_agg['is_dup'] = df_syls_agg.duplicated(['str_part','form_str_part'],False)
df_syls_agg = df_syls_agg[df_syls_agg['is_dup'] == True]
df_syls_agg.to_csv('output/dist_ortho_sign.csv',encoding='utf-8')
df_syls_agg

Unnamed: 0,str_part,form_str_part,b,count,is_dup
1349,ar,ar-hiš,ar,3,True
1350,ar,ar-hiš,ar₂,28,True
2362,dul,dul-li,dul,20,True
2363,dul,dul-li,dul₆,4,True
2369,dul,dul-lu,dul,70,True
2370,dul,dul-lu,dul₆,31,True
2394,gir,e-gir-te,gir,3,True
2395,gir,e-gir-te,gir₂,8,True
2402,gir,e-gir-tu,gir,3,True
2403,gir,e-gir-tu,gir₂,36,True


In [63]:
df_select_bs = df_syls_agg[['str_part','b']].drop_duplicates()
#Don't need to create combined column here because b is sufficient
#df_select_signmods['combined'] = df_select_signmods['sign_form'] + ':' + df_select_signmods['mods_str']
df_select_bs

Unnamed: 0,str_part,b
1349,ar,ar
1350,ar,ar₂
2362,dul,dul
2363,dul,dul₆
2394,gir,gir
2395,gir,gir₂
2418,gur,gur
2419,gur,gur₂
3248,ia,ia
3249,ia,ia₂


In [30]:
select_syls = list(df_syls_agg['str_part'].unique())
select_syls

['dul', 'gur', 'ia', 'li', 'mi', 'tu', 'u', 'ša', 'šu', 'šum', 'ṭi']

In [31]:
df_file_select_bs = df_ortho_signs[df_ortho_signs['str_part'].isin(select_syls)]
df_file_select_bs = pd.DataFrame(df_file_select_bs.groupby(['file','str_part','b'])['a'].agg('count')).reset_index()
#Again combined is just b
#df_file_select_syls['combined'] = df_file_select_signs['sign_form'] + ':' + df_file_select_signs['mods_str']
df_file_select_bs

Unnamed: 0,file,str_part,b,a
0,P224485.json,dul,dul,2
1,P224485.json,ia,ia,16
2,P224485.json,li,li,6
3,P224485.json,li,li₂,7
4,P224485.json,mi,mi,7
5,P224485.json,mi,mi₃,1
6,P224485.json,tu,tu,10
7,P224485.json,tu,tu₂,6
8,P224485.json,u,u,9
9,P224485.json,u,u₂,19


<p>3. Defining our selection</p>

In [66]:
#Paleography
map_paleo = {'BA':['BA:..','BA:.t.'],
            'BU':['BU:..','BU:.p.'],
            'DA':['DA:..','DA:.d.'],
             'DI':['DI:..','DI:.d.'],
             'LI':['LI:..','LI:.d.'],
             'LU₂':['LU₂:v..','LU₂:v.y.'],
             'NA':['NA:..','NA:.t.'],
             'NI':['NI:..','NI:.d.'],
            'SU':['SU:..','SU:.t.'],
            'TI':['TI:..','TI:.t.'],
            'U₂':['U₂:..','U₂:.m.'],
             #'RU':['RU:..','RU:.d.'],
             '|ME.U.U.U|':['|ME.U.U.U|:..','|ME.U.U.U|:.m.'],
             'ŠA': ['ŠA:..','ŠA:.dm.']
            }
list_paleo = []
list_paleo_pairs = []
for v in map_paleo.values():
    list_paleo = list_paleo + v
    list_paleo_pairs.append(v)
list_paleo

#Orthography - Sign
map_ortho_sign = {'ia':['ia','ia₂'],
                  'li':['li','li₂'],
                  'ša':['ša','ša₂'],
                  'šu':['šu','šu₂'],
                  'tu':['tu','tu₂'],
                  'u':['u','u₂']
                 }
list_ortho_sign = []
list_ortho_sign_pairs = []
for v in map_ortho_sign.values():
    list_ortho_sign = list_ortho_sign + v
    list_ortho_sign_pairs.append(v)
list_ortho_sign

#Orthography - Word
map_ortho_word = {'bēlu[lord]N:bēlī':['bēlu[lord]N:bēlī:be-li₂','bēlu[lord]N:bēlī:EN'],
                  'bēlu[lord]N:bēlīya':['bēlu[lord]N:bēlīya:EN-ia','bēlu[lord]N:bēlīya:be-li₂-ia'],
                  'libbu[interior]N:libbi':['libbu[interior]N:libbi:ŠA₃','libbu[interior]N:libbi:ŠA₃-bi'],
                  'lā[not]MOD:lā':['lā[not]MOD:lā:la','lā[not]MOD:lā:la-a'],
                  'lū[may]MOD:lū':['lū[may]MOD:lū:lu','lū[may]MOD:lū:lu-u'],
                  'mā[saying]PRP:mā':['mā[saying]PRP:mā:ma','mā[saying]PRP:mā:ma-a'],
                  'muhhu[skull]N:muhhi':['muhhu[skull]N:muhhi:UGU','muhhu[skull]N:muhhi:UGU-hi'],
                  'pānu[front]N:pān':['pānu[front]N:pān:IGI','pānu[front]N:pān:pa-an'],
                  'šulmu[completeness]N:šulmu':['šulmu[completeness]N:šulmu:DI-mu','šulmu[completeness]N:šulmu:šul-mu']
                }
list_ortho_word = []
list_ortho_word_pairs = []
for v in map_ortho_word.values():
    list_ortho_word = list_ortho_word + v
    list_ortho_word_pairs.append(v)
list_ortho_word_pairs
list_features_all = list_paleo + list_ortho_sign + list_ortho_word
list_pairs_all = list_paleo_pairs + list_ortho_sign_pairs + list_ortho_word_pairs

Save the counts matrices with only these features

In [80]:
tm_counts_class = pd.concat([tm_counts[list_features_all],df_class],axis=1)
tm_counts_class.to_csv('output/tm_counts.csv',encoding='utf-8')

tm_counts_class_senderloc = pd.DataFrame(tm_counts_class.groupby('senderloc').agg('sum'))[list_features_all]
tm_counts_class_senderloc.to_csv('output/tm_counts_senderloc.csv',encoding='utf-8')

tm_counts_class_dossier = pd.DataFrame(tm_counts_class.groupby('dossier_nocertain').agg('sum'))[list_features_all]
tm_counts_class_dossier.to_csv('output/tm_counts_dossier.csv',encoding='utf-8')

tm_counts_class_dossier = pd.DataFrame(tm_counts_class.groupby('dossier').agg('sum'))[list_features_all]
tm_counts_class_dossier.to_csv('output/tm_counts_dossier_abc.csv',encoding='utf-8')

tm_counts_class_saachap = pd.DataFrame(tm_counts_class.groupby('saa_chap').agg('sum'))[list_features_all]
tm_counts_class_saachap.to_csv('output/tm_counts_saachap.csv',encoding='utf-8')

Put them all together

In [33]:
map_all = map_paleo.copy()
map_all.update(map_ortho_sign)
map_all.update(map_ortho_word)
map_all

{'NA': ['NA:..', 'NA:.t.'],
 'NI': ['NI:..', 'NI:.d.'],
 'bēlu[lord]N:bēlīya': ['bēlu[lord]N:bēlīya:EN-ia',
  'bēlu[lord]N:bēlīya:be-li₂-ia'],
 'ia': ['ia', 'ia₂'],
 'lā[not]MOD:lā': ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a'],
 'lū[may]MOD:lū': ['lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u'],
 'u': ['u', 'u₂'],
 '|ME.U.U.U|': ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.'],
 'ša': ['ša', 'ša₂'],
 'šu': ['šu', 'šu₂']}

Form the Power Set

In [82]:
def p(s):
    powerset = []
    for i in range(2**len(s)):
        subset = [x for j,x in enumerate(s) if (i >> j) & 1]
        powerset.append(subset)
    return powerset

In [83]:
import itertools
list_power = p(list_pairs_all)
list_power = [list(itertools.chain.from_iterable(x)) for x in list_power if x != []]
len(list_power)

KeyboardInterrupt: 

Form the distribution matrix

In [84]:
df_form_count = pd.DataFrame(df_words[df_words['lemma_norm_form'].isin(list_ortho_word)].groupby(['lemma_norm','lemma_norm_form'])['cf'].agg('count'))
df_form_count

dict_form_count = {}
for ln in map_ortho_word:
    count1 = float(df_form_count.loc[(ln,map_ortho_word[ln][0])]['cf'])
    count2 = float(df_form_count.loc[(ln,map_ortho_word[ln][1])]['cf'])
    tot12 = count1 + count2
    dict_form_count[map_ortho_word[ln][0]] = count1 / tot12
    dict_form_count[map_ortho_word[ln][1]] = count2 / tot12
dict_form_count

df_paleo_count = pd.DataFrame(df[df['combined'].isin(list_paleo)].groupby(['sign_form','combined'])['cf'].agg('count'))
df_paleo_count

dict_paleo_count = {}
for ln in map_paleo:
    count1 = float(df_paleo_count.loc[(ln,map_paleo[ln][0])]['cf'])
    count2 = float(df_paleo_count.loc[(ln,map_paleo[ln][1])]['cf'])
    tot12 = count1 + count2
    dict_paleo_count[map_paleo[ln][0]] = count1 / tot12
    dict_paleo_count[map_paleo[ln][1]] = count2 / tot12
dict_paleo_count

df_sign_count = pd.DataFrame(df[df['b'].isin(list_ortho_sign)].groupby(['str_part','b'])['cf'].agg('count'))
df_sign_count

dict_sign_count = {}
for ln in map_ortho_sign:
    count1 = float(df_sign_count.loc[(ln,map_ortho_sign[ln][0])]['cf'])
    count2 = float(df_sign_count.loc[(ln,map_ortho_sign[ln][1])]['cf'])
    tot12 = count1 + count2
    dict_sign_count[map_ortho_sign[ln][0]] = count1 / tot12
    dict_sign_count[map_ortho_sign[ln][1]] = count2 / tot12
dict_sign_count

{'ia': 0.8026070763500931,
 'ia₂': 0.1973929236499069,
 'li': 0.370507399577167,
 'li₂': 0.629492600422833,
 'tu': 0.4522096608427544,
 'tu₂': 0.5477903391572456,
 'u': 0.4491930663478781,
 'u₂': 0.550806933652122,
 'ša': 0.8655848132271893,
 'ša₂': 0.13441518677281078,
 'šu': 0.30218446601941745,
 'šu₂': 0.6978155339805825}

In [85]:
dict_count_all = dict_paleo_count.copy()
dict_count_all.update(dict_sign_count)
dict_count_all.update(dict_form_count)
dict_count_all

{'BA:..': 0.7768166089965398,
 'BA:.t.': 0.2231833910034602,
 'BU:..': 0.9074852817493693,
 'BU:.p.': 0.09251471825063078,
 'DA:..': 0.7073825503355705,
 'DA:.d.': 0.29261744966442954,
 'DI:..': 0.8773946360153256,
 'DI:.d.': 0.12260536398467432,
 'LI:..': 0.7777777777777778,
 'LI:.d.': 0.2222222222222222,
 'LU₂:v..': 0.7032608695652174,
 'LU₂:v.y.': 0.2967391304347826,
 'NA:..': 0.8526540573520439,
 'NA:.t.': 0.14734594264795606,
 'NI:..': 0.9462425595238095,
 'NI:.d.': 0.05375744047619048,
 'SU:..': 0.6962962962962963,
 'SU:.t.': 0.3037037037037037,
 'TI:..': 0.42098765432098767,
 'TI:.t.': 0.5790123456790124,
 'U₂:..': 0.8165038002171553,
 'U₂:.m.': 0.18349619978284473,
 'bēlu[lord]N:bēlī:EN': 0.220873786407767,
 'bēlu[lord]N:bēlī:be-li₂': 0.779126213592233,
 'bēlu[lord]N:bēlīya:EN-ia': 0.7136237256719185,
 'bēlu[lord]N:bēlīya:be-li₂-ia': 0.28637627432808155,
 'ia': 0.8026070763500931,
 'ia₂': 0.1973929236499069,
 'li': 0.370507399577167,
 'libbu[interior]N:libbi:ŠA₃': 0.43927125506

Form the Distribution Matrix

In [40]:
def form_dist_matrix(tm_count,mapping):
    d = {}
    vecs = {}
    
    listing = []
    for v in mapping.values():
        listing = listing + v
        
    tm_count = tm_count[listing]
    for i, row in tm_count.iterrows():
        d[i] = {}
        for key in mapping:
            key_sum = np.sum(tm_count.loc[i][mapping[key]])
            for v in mapping[key]:
                if key_sum > 0:
                    d[i][v] = tm_count.loc[i][v] / key_sum
                else:
                    d[i][v] = np.nan

    tm_dist = pd.DataFrame(d).transpose()
    return tm_dist

In [41]:
tm_dist_all = form_dist_matrix(tm_counts,map_all)
tm_dist_all

Unnamed: 0,NA:..,NA:.t.,NI:..,NI:.d.,bēlu[lord]N:bēlīya:EN-ia,bēlu[lord]N:bēlīya:be-li₂-ia,ia,ia₂,lā[not]MOD:lā:la,lā[not]MOD:lā:la-a,lū[may]MOD:lū:lu,lū[may]MOD:lū:lu-u,u,u₂,|ME.U.U.U|:..,|ME.U.U.U|:.m.,ša,ša₂,šu,šu₂
P224485,0.526316,0.473684,0.977273,0.022727,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.875000,0.125000,0.321429,0.678571,0.315789,0.684211,0.807692,0.192308,0.108108,0.891892
P237089,1.000000,0.000000,1.000000,0.000000,,,0.000000,1.000000,,,0.500000,0.500000,1.000000,0.000000,,,1.000000,0.000000,1.000000,0.000000
P238649,1.000000,0.000000,0.600000,0.400000,,,,,,,,,0.500000,0.500000,,,1.000000,0.000000,,
P313416,1.000000,0.000000,0.500000,0.500000,0.000000,1.000000,0.333333,0.666667,,,0.000000,1.000000,0.333333,0.666667,0.000000,1.000000,,,,
P313417,0.357143,0.642857,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000
P313419,0.833333,0.166667,0.800000,0.200000,1.000000,0.000000,1.000000,0.000000,0.666667,0.333333,1.000000,0.000000,0.666667,0.333333,,,0.750000,0.250000,0.000000,1.000000
P313420,1.000000,0.000000,1.000000,0.000000,,,0.375000,0.625000,1.000000,0.000000,0.500000,0.500000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000
P313421,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.500000,0.500000,,,0.000000,1.000000,0.333333,0.666667,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000
P313422,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.428571,0.571429,,,1.000000,0.000000,0.000000,1.000000
P313425,0.769231,0.230769,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.800000,0.200000,1.000000,0.000000,0.473684,0.526316,0.250000,0.750000,0.823529,0.176471,1.000000,0.000000


Fill in empty features with their distribution over the entire corpus

In [42]:
for lnf in dict_count_all:
    #print(dict_form_count[lnf])
    tm_dist_all[lnf] = tm_dist_all[lnf].fillna(dict_count_all[lnf])
tm_dist_all

Unnamed: 0,NA:..,NA:.t.,NI:..,NI:.d.,bēlu[lord]N:bēlīya:EN-ia,bēlu[lord]N:bēlīya:be-li₂-ia,ia,ia₂,lā[not]MOD:lā:la,lā[not]MOD:lā:la-a,lū[may]MOD:lū:lu,lū[may]MOD:lū:lu-u,u,u₂,|ME.U.U.U|:..,|ME.U.U.U|:.m.,ša,ša₂,šu,šu₂
P224485,0.526316,0.473684,0.977273,0.022727,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.875000,0.125000,0.321429,0.678571,0.315789,0.684211,0.807692,0.192308,0.108108,0.891892
P237089,1.000000,0.000000,1.000000,0.000000,0.713624,0.286376,0.000000,1.000000,0.841892,0.158108,0.500000,0.500000,1.000000,0.000000,0.724797,0.275203,1.000000,0.000000,1.000000,0.000000
P238649,1.000000,0.000000,0.600000,0.400000,0.713624,0.286376,0.802607,0.197393,0.841892,0.158108,0.730878,0.269122,0.500000,0.500000,0.724797,0.275203,1.000000,0.000000,0.302184,0.697816
P313416,1.000000,0.000000,0.500000,0.500000,0.000000,1.000000,0.333333,0.666667,0.841892,0.158108,0.000000,1.000000,0.333333,0.666667,0.000000,1.000000,0.865585,0.134415,0.302184,0.697816
P313417,0.357143,0.642857,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000
P313419,0.833333,0.166667,0.800000,0.200000,1.000000,0.000000,1.000000,0.000000,0.666667,0.333333,1.000000,0.000000,0.666667,0.333333,0.724797,0.275203,0.750000,0.250000,0.000000,1.000000
P313420,1.000000,0.000000,1.000000,0.000000,0.713624,0.286376,0.375000,0.625000,1.000000,0.000000,0.500000,0.500000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000
P313421,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.500000,0.500000,0.841892,0.158108,0.000000,1.000000,0.333333,0.666667,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000
P313422,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.428571,0.571429,0.724797,0.275203,1.000000,0.000000,0.000000,1.000000
P313425,0.769231,0.230769,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.800000,0.200000,1.000000,0.000000,0.473684,0.526316,0.250000,0.750000,0.823529,0.176471,1.000000,0.000000


<h3>Clustering</h3>

<p>In this section we iterate over each feature grouping in the power set to see which ones give us the best clustering. The measurements we are using are the Elbow Method and the Silhouette Score. These metrics can tell us how many clusters work best for each feature group, if any at all</p>

In [43]:
def calculate_scores(tm_dist,feature_list,flist_str):
    list_scores = []
    c = 0
    for g in feature_list:
        c += 1
        print(str(c) + '. Working on: ' + str(g))
        diff = 0
        decel = 0
        for k in range(1,30):
            d = {}
            km = KMeans(n_clusters=k,max_iter=1000).fit(tm_dist[g])

            sil_score = np.nan
            if k > 1:
                sil_score = silhouette_score(tm_dist[g],labels=km.labels_)

            if k > 2:
                decel = km.inertia_ - inertia  - diff
            if k > 1:
                diff = km.inertia_ - inertia

            inertia = km.inertia_

            d = {'group': str(g),'group_len': len(g), 'k': k,'sil_score': sil_score,'dist': inertia,'diff':diff,'decel':decel}
            list_scores.append(d)

    df_scores = pd.DataFrame(list_scores)
    df_scores.to_csv('output/scores_' + flist_str + '.csv',encoding='utf-8',sep='\t')
    return df_scores

In [44]:
def export_plots(df_scores,feature_list,flist_str):
    c = 0
    for g in feature_list:
        c += 1
        print(str(c) + '. Plotting: ' + str(g))
        df_temp = df_scores[df_scores['group'] == str(g)]
        K = list(df_temp['k'])

        plt.figure(figsize=(12, 12), dpi=120, facecolor='w', edgecolor='k')

        plt.plot(K,df_temp['dist'])
        plt.xticks(K)
        plt.xlabel('k')
        plt.ylabel('Distortion')
        plt.title('Elbow Method: ' + str(g))
        plt.savefig('output/plots/elbow_' + flist_str + '_reg_' + str(c) + '.png')

        plt.clf()

In [45]:
#df_scores = calculate_scores(tm_dist_all,list_ortho_word_power,'ortho_word')
#export_plots(df_scores,list_ortho_word_power,'ortho_word')
#df_scores = calculate_scores(tm_dist_all,list_ortho_sign_power,'ortho_sign')
#export_plots(df_scores,list_ortho_sign_power,'ortho_sign')
#df_scores = calculate_scores(tm_dist_all,list_paleo_power,'paleo')
#export_plots(df_scores,list_paleo_power,'paleo')

<h3>Evaluation</h3>
<p>For this section we will calculate purity scores for the clusters according to three types of classifications: Sender location, dossier, and SAA chapter</p>

In [46]:
cat_file = codecs.open('sargonletters/catalogue.json','r','utf-8')
cat_json = json.load(cat_file)

class_l = []
class_index = []
for pnum in cat_json['members']:
    id_text        = cat_json['members'][pnum].get('id_text','')
    designation    = cat_json['members'][pnum].get('designation','')
    ancient_author = cat_json['members'][pnum].get('ancient_author','')
    dossier        = cat_json['members'][pnum].get('dossier','')
    dossier_nocertain = '.'.join(dossier.split('.')[0:3])
    saa_chap       = '.'.join(dossier.split('.')[0:2])
    senderloc     = cat_json['members'][pnum].get('senderloc','')
    class_d = {'designation': designation,'ancient_author':ancient_author,'dossier':dossier,'dossier_nocertain': dossier_nocertain, 'senderloc':senderloc,'saa_chap':saa_chap}
    class_index.append(id_text)
    class_l.append(class_d)
    
df_class = pd.DataFrame(class_l,index=class_index)
df_class

Unnamed: 0,ancient_author,designation,dossier,dossier_nocertain,saa_chap,senderloc
P224485,Sargon II,SAA 01 001,SAA01.01.01.a,SAA01.01.01,SAA01.01,Royal Court
P237089,Issar-duri,SAA 15 014,SAA15.01.01.a,SAA15.01.01,SAA15.01,Arrapha
P238649,(unknown),SAA 15 368,SAA15.09.07.c,SAA15.09.07,SAA15.09,uncertain
P313416,Sin-ašared,SAA 01 158,SAA01.07.15.c,SAA01.07.15,SAA01.07,Assyria
P313417,Mannu-ki-Aššur-le’i,SAA 01 233,SAA01.13.01.c,SAA01.13.01,SAA01.13,Guzana
P313419,[...]-ka’’in,SAA 05 040,SAA05.02.03.a,SAA05.02.03,SAA05.02,Tušhan
P313420,Il-yada’,SAA 15 164,SAA15.06.01.a,SAA15.06.01,SAA15.06,Dur-Kurigalzu
P313421,Šarru-emuranni,SAA 15 237,SAA15.07.01.a,SAA15.07.01,SAA15.07,Babylon
P313422,Gabbu-ana-Aššur,SAA 05 114,SAA05.07.01.c,SAA05.07.01,SAA05.07,Kurbail
P313425,Bel-liqbi,SAA 01 179,SAA01.08.03.c,SAA01.08.03,SAA01.08,Zobah


---PNUM DOSSIER---

In [86]:
def pnum_to_dossier(pnum):
    return df_class.loc[pnum]['dossier']

In [101]:
df['dossier'] = df['text_id'].apply(pnum_to_dossier)

In [399]:
df_words['dossier'] = df_words['text_id'].apply(pnum_to_dossier)

In [102]:
df_dossier_a = df[df['dossier'].str.contains('.a')]
df_dossier_a

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,sign_loc_id,text_id,word_num,word_sign_tot,sign_form,mods_str,str_part,num_part,combined,dossier
0,,a,damaged,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,P224485.2.1.0,P224485,1,2,A,..,a,1,A:..,SAA01.01.01.a
1,,bat,,awātu,N,,P224485.json,a-bat,⸢a⸣-bat,word,...,P224485.2.1.1,P224485,1,2,BAD,..,bat,1,BAD:..,SAA01.01.01.a
2,,LUGAL,,šarru,N,,P224485.json,LUGAL,LUGAL,king,...,P224485.2.2.0,P224485,2,1,LUGAL,..,LUGAL,1,LUGAL:..,SAA01.01.01.a
3,,a,,ana,PRP,,P224485.json,a-na,a-na\t,to,...,P224485.2.3.0,P224485,3,2,A,..,a,1,A:..,SAA01.01.01.a
4,,na,,ana,PRP,t,P224485.json,a-na,a-na\t,to,...,P224485.2.3.1,P224485,3,2,,.t.,na,1,NA:.t.,SAA01.01.01.a
5,,1(diš),,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4.0,P224485,4,4,DIŠ,..,1(diš),1,DIŠ:..,SAA01.01.01.a
6,,aš,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4.1,P224485,4,4,AŠ,..,aš,1,AŠ:..,SAA01.01.01.a
7,,šur,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4.2,P224485,4,4,SUR,..,šur,1,SUR:..,SAA01.01.01.a
8,,MAN,,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4.3,P224485,4,4,|U.U|,..,MAN,1,|U.U|:..,SAA01.01.01.a
9,,PAB,damaged,Aššur-šarru-uṣur,PN,,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,...,P224485.2.4.4,P224485,4,4,PAP,..,PAB,1,PAP:..,SAA01.01.01.a


In [400]:
df_words_dossier_a = df_words[df_words['dossier'].str.contains('.a')]
df_words_dossier_a

Unnamed: 0,cf,epos,file,form,frag,gw,line_label,norm,pos,ref,sense,text_id,word_num,word_sign_tot,lemma,lemma_norm,lemma_norm_form,dossier
0,awātu,N,P224485.json,a-bat,⸢a⸣-bat,word,o 1,abat,N,P224485.2.1,word,P224485,1,2,awātu[word]N,awātu[word]N:abat,awātu[word]N:abat:a-bat,SAA01.01.01.a
1,šarru,N,P224485.json,LUGAL,LUGAL,king,o 1,šarri,N,P224485.2.2,king,P224485,2,1,šarru[king]N,šarru[king]N:šarri,šarru[king]N:šarri:LUGAL,SAA01.01.01.a
2,ana,PRP,P224485.json,a-na,a-na\t,to,o 1,ana,PRP,P224485.2.3,to,P224485,3,2,ana[to]PRP,ana[to]PRP:ana,ana[to]PRP:ana:a-na,SAA01.01.01.a
3,Aššur-šarru-uṣur,PN,P224485.json,{1}aš-šur-MAN-PAB,{1}aš-šur-MAN-⸢PAB,1,o 1,Aššur-šarru-uṣur,PN,P224485.2.4,1,P224485,4,4,Aššur-šarru-uṣur[1]PN,Aššur-šarru-uṣur[1]PN:Aššur-šarru-uṣur,Aššur-šarru-uṣur[1]PN:Aššur-šarru-uṣur:{1}aš-š...,SAA01.01.01.a
4,šulmu,N,P224485.json,šul-mu,šul⸣-mu,completeness,o 1,šulmu,N,P224485.2.5,health,P224485,5,2,šulmu[completeness]N,šulmu[completeness]N:šulmu,šulmu[completeness]N:šulmu:šul-mu,SAA01.01.01.a
5,yâšim,IP,P224485.json,ia-a-ši,ia-⸢a⸣-ši,to me,o 1,ayāši,IP,P224485.2.6,me,P224485,6,3,yâšim[to me]IP,yâšim[to me]IP:ayāši,yâšim[to me]IP:ayāši:ia-a-ši,SAA01.01.01.a
6,šulmu,N,P224485.json,šul-mu,⸢šul⸣-mu,completeness,o 2,šulmu,N,P224485.3.1,health,P224485,7,2,šulmu[completeness]N,šulmu[completeness]N:šulmu,šulmu[completeness]N:šulmu:šul-mu,SAA01.01.01.a
7,ana,PRP,P224485.json,a-na,a-na\t,to,o 2,ana,PRP,P224485.3.2,to,P224485,8,2,ana[to]PRP,ana[to]PRP:ana,ana[to]PRP:ana:a-na,SAA01.01.01.a
8,Mat-Aššur,GN,P224485.json,KUR-aš-šur{KI},KUR-aš-šur{ki},Assyria,o 2,Mat-Aššur,GN,P224485.3.3,Assyria,P224485,9,4,Mat-Aššur[Assyria]GN,Mat-Aššur[Assyria]GN:Mat-Aššur,Mat-Aššur[Assyria]GN:Mat-Aššur:KUR-aš-šur{KI},SAA01.01.01.a
9,libbu,N,P224485.json,ŠA₃-ka,⸢ŠA₃⸣-[ka],interior,o 2,libbaka,N,P224485.3.4,mood,P224485,10,2,libbu[interior]N,libbu[interior]N:libbaka,libbu[interior]N:libbaka:ŠA₃-ka,SAA01.01.01.a


In [147]:
#pnum_list_a = list(df_dossier_a[df_dossier_a['dossier'].isin(['SAA01.03.01.a','SAA01.04.01.a'])]['text_id'].unique())
pnum_list_a = list(df_dossier_a['text_id'].unique())
pnum_list_a

['P224485',
 'P237089',
 'P313419',
 'P313420',
 'P313421',
 'P313434',
 'P313435',
 'P313437',
 'P313439',
 'P313447',
 'P313448',
 'P313472',
 'P313478',
 'P313479',
 'P313504',
 'P313510',
 'P313515',
 'P313523',
 'P313540',
 'P313542',
 'P313551',
 'P313626',
 'P313630',
 'P313640',
 'P313750',
 'P313788',
 'P313874',
 'P313911',
 'P313926',
 'P314044',
 'P334020',
 'P334036',
 'P334037',
 'P334038',
 'P334039',
 'P334040',
 'P334041',
 'P334042',
 'P334043',
 'P334044',
 'P334045',
 'P334046',
 'P334047',
 'P334048',
 'P334049',
 'P334050',
 'P334051',
 'P334053',
 'P334054',
 'P334055',
 'P334060',
 'P334069',
 'P334070',
 'P334071',
 'P334072',
 'P334073',
 'P334074',
 'P334075',
 'P334076',
 'P334077',
 'P334078',
 'P334079',
 'P334080',
 'P334081',
 'P334083',
 'P334084',
 'P334090',
 'P334091',
 'P334092',
 'P334094',
 'P334097',
 'P334099',
 'P334100',
 'P334101',
 'P334102',
 'P334103',
 'P334104',
 'P334105',
 'P334106',
 'P334108',
 'P334109',
 'P334111',
 'P334113',
 'P3

In [190]:
df_opening_counts = pd.read_csv('opening_counts.csv',encoding='utf-8')
df_opening_counts = df_opening_counts.fillna(0)
df_opening_counts = df_opening_counts.drop_duplicates()
df_opening_counts = df_opening_counts.set_index('id_text')
df_opening_counts

Unnamed: 0_level_0,nos_words
id_text,Unnamed: 1_level_1
P334656,0.0
P334282,0.0
P334723,0.0
P334210,0.0
P334357,0.0
P334209,0.0
X900010,0.0
P334222,0.0
P334361,0.0
X900005,0.0


In [191]:
df_opening_a = pd.DataFrame()
for pnum in pnum_list_a:
    try:
        #print('Good: '+pnum)
        df_pnum = df_dossier_a[(df_dossier_a['text_id'] == pnum) & (df_dossier_a['word_num'] <= df_opening_counts.loc[pnum]['nos_words'])]
        df_opening_a = pd.concat([df_opening_a,df_pnum])
    except KeyError:
        #print(pnum)
        continue
df_opening_a.to_csv('output/opening_signs_sargon.csv',encoding='utf-8')

In [275]:
df_opening_a

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,sign_loc_id,text_id,word_num,word_sign_tot,sign_form,mods_str,str_part,num_part,combined,dossier
1123,,a,,ana,PRP,,P237089.json,a-na,a-na,to,...,P237089.2.1.0,P237089,1,2,A,..,a,1,A:..,SAA15.01.01.a
1124,,na,,ana,PRP,,P237089.json,a-na,a-na,to,...,P237089.2.1.1,P237089,1,2,,..,na,1,NA:..,SAA15.01.01.a
1125,,LUGAL,,šarru,N,,P237089.json,LUGAL,LUGAL,king,...,P237089.2.2.0,P237089,2,1,LUGAL,..,LUGAL,1,LUGAL:..,SAA15.01.01.a
1126,,be,,bēlu,N,,P237089.json,be-li₂-ia₂,be-li₂-ia₂,lord,...,P237089.2.3.0,P237089,3,3,BAD,..,be,1,BAD:..,SAA15.01.01.a
1127,,li₂,,bēlu,N,,P237089.json,be-li₂-ia₂,be-li₂-ia₂,lord,...,P237089.2.3.1,P237089,3,3,NI,..,li,2,NI:..,SAA15.01.01.a
1128,,ia₂,,bēlu,N,,P237089.json,be-li₂-ia₂,be-li₂-ia₂,lord,...,P237089.2.3.2,P237089,3,3,5(DIŠ),..,ia,2,5(DIŠ):..,SAA15.01.01.a
1129,,ARAD,,ardu,N,,P237089.json,ARAD-ka,ARAD-ka,slave,...,P237089.3.1.0,P237089,4,2,ARAD,..,ARAD,1,ARAD:..,SAA15.01.01.a
1130,,ka,,ardu,N,,P237089.json,ARAD-ka,ARAD-ka,slave,...,P237089.3.1.1,P237089,4,2,KA,..,ka,1,KA:..,SAA15.01.01.a
1131,,1(diš),,Issar-duri,PN,,P237089.json,{1}{d}15-du-ri,{1}{d}15-du-ri,1,...,P237089.3.2.0,P237089,5,5,DIŠ,..,1(diš),1,DIŠ:..,SAA15.01.01.a
1132,,d,,Issar-duri,PN,,P237089.json,{1}{d}15-du-ri,{1}{d}15-du-ri,1,...,P237089.3.2.1,P237089,5,5,AN,..,d,1,AN:..,SAA15.01.01.a


In [223]:
len(list(df_opening_a['text_id'].unique()))

164

In [216]:
df_opening_a_2 = df_opening_a[df_opening_a['dossier'].isin(['SAA01.03.01.a','SAA01.04.01.a'])]

In [221]:
df_dist_paleo = pd.DataFrame(df_opening_a.groupby(['sign_form','combined']).apply(lambda row: ','.join(row['sign_loc_id'] + ' (' + row['line_label'] + ')'))).reset_index() #.to_csv('output/dist_ortho_sign_opening.csv',encoding='utf-8')

df_dist_paleo.columns = ['sign_form','combined','all_locs']
df_dist_paleo['count'] = df_dist_paleo['all_locs'].apply(lambda x: len(x.split(',')))
df_dist_paleo['is_dup'] = df_dist_paleo.duplicated(['sign_form'],False)
df_dist_paleo = df_dist_paleo[df_dist_paleo['is_dup'] == True]
df_dist_paleo.to_csv('output/dist_paleo_opening.csv',encoding='utf-8')
df_dist_paleo

Unnamed: 0,sign_form,combined,all_locs,count,is_dup
3,AK,AK:..,"P334078.3.2.2 (o 2),P334078.5.1.1 (o 4),P33407...",12,True
4,AK,AK:.d.,P313435.3.2.2 (o 2),1,True
5,AK,AK:.td.,P334665.3.2.5 (o 2),1,True
9,ARAD,ARAD:..,"P237089.3.1.0 (o 2),P313419.3.1.0 (o 2),P31342...",162,True
10,ARAD,ARAD:.p.,"P313421.3.1.0 (o 2),P313504.3.1.0 (o 2),P31364...",3,True
17,BU,BU:..,"P313435.6.4.2 (o 5),P313472.3.2.6 (o 2),P31351...",28,True
18,BU,BU:.t.,"P313420.7.4.1 (o 6),P334271.6.2.2 (o 5),P33433...",4,True
19,BU,BU:.tdp.,P334665.8.1.2 (o 7),1,True
20,BU,BU:.tp.,P334037.6.1.2 (o 5),1,True
21,DA,DA:..,"P313420.3.2.3 (o 2),P334343.3.2.3 (o 2)",2,True


In [333]:
sign_list = ['AG','ARAD','DI','E₂','LU₂','|ME.U.U.U|','MU','NI','NUN','RU','ŠA','ŠA₃','TI','U₂','URU','KA','LU']
df_special_signs_open = pd.DataFrame(df_opening_a[(df_opening_a['sign_form'].isin(sign_list)) &
                                                 (df_opening_a['mods_str'] != '..')].groupby('text_id').agg('count')['a'])
df_special_sort = df_special_signs_open.sort_values(by='a',ascending=False).head(5)
df_special_sort

Unnamed: 0_level_0,a
text_id,Unnamed: 1_level_1
P334080,5
P334534,5
P334505,4
P334091,4
P334092,4


Compare Openings to Rest of Text

In [315]:
list_pnums_open = list(df_opening_a['text_id'].unique())
sign_loc_list = list(df_opening_a['sign_loc_id'])
df_nonopen_a = df_dossier_a[~(df_dossier_a['sign_loc_id'].isin(sign_loc_list)) & (df_dossier_a['text_id'].isin(list_pnums_open))]
df_nonopen_a

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,sign_loc_id,text_id,word_num,word_sign_tot,sign_form,mods_str,str_part,num_part,combined,dossier
1145,,ša,missing,ša,REL,,P237089.json,ša,[ša],that,...,P237089.5.1.0,P237089,11,1,ŠA,..,ša,1,ŠA:..,SAA15.01.01.a
1146,,LUGAL,,šarru,N,,P237089.json,LUGAL,LUGAL,king,...,P237089.5.2.0,P237089,12,1,LUGAL,..,LUGAL,1,LUGAL:..,SAA15.01.01.a
1147,,be,,bēlu,N,,P237089.json,be-li₂,be-⸢li₂⸣,lord,...,P237089.5.3.0,P237089,13,2,BAD,..,be,1,BAD:..,SAA15.01.01.a
1148,,li₂,damaged,bēlu,N,,P237089.json,be-li₂,be-⸢li₂⸣,lord,...,P237089.5.3.1,P237089,13,2,NI,..,li,2,NI:..,SAA15.01.01.a
1149,,iš,missing,šapāru,V,,P237089.json,iš-pur-an-ni,[iš]-⸢pur⸣-an-ni,send,...,P237089.5.4.0,P237089,14,4,IŠ,..,iš,1,IŠ:..,SAA15.01.01.a
1150,,pur,damaged,šapāru,V,,P237089.json,iš-pur-an-ni,[iš]-⸢pur⸣-an-ni,send,...,P237089.5.4.1,P237089,14,4,BUR,..,pur,1,BUR:..,SAA15.01.01.a
1151,,an,,šapāru,V,,P237089.json,iš-pur-an-ni,[iš]-⸢pur⸣-an-ni,send,...,P237089.5.4.2,P237089,14,4,AN,..,an,1,AN:..,SAA15.01.01.a
1152,,ni,,šapāru,V,,P237089.json,iš-pur-an-ni,[iš]-⸢pur⸣-an-ni,send,...,P237089.5.4.3,P237089,14,4,NI,..,ni,1,NI:..,SAA15.01.01.a
1153,,ma,missing,mā,PRP,,P237089.json,ma-a,[ma-a,saying,...,P237089.6.1.0,P237089,15,2,MA,..,ma,1,MA:..,SAA15.01.01.a
1154,,a,missing,mā,PRP,,P237089.json,ma-a,[ma-a,saying,...,P237089.6.1.1,P237089,15,2,A,..,a,1,A:..,SAA15.01.01.a


In [372]:
df_open_std = df_opening_a[(df_opening_a['sign_form'].isin(sign_list)) & (df_opening_a['mods_str'] == '..')]
df_open_nstd = df_opening_a[(df_opening_a['sign_form'].isin(sign_list)) & (df_opening_a['mods_str'] != '..')]

df_nonopen_std = df_nonopen_a[(df_nonopen_a['sign_form'].isin(sign_list)) & (df_nonopen_a['mods_str'] == '..')]
df_nonopen_nstd = df_nonopen_a[(df_nonopen_a['sign_form'].isin(sign_list)) & (df_nonopen_a['mods_str'] != '..')]

In [381]:
df_open_std_counts = pd.DataFrame(df_open_std.groupby(['text_id','sign_form']).agg('count')['a'])
df_open_nstd_counts = pd.DataFrame(df_open_nstd.groupby(['text_id','sign_form']).agg('count')['m'])
df_nonopen_std_counts = pd.DataFrame(df_nonopen_std.groupby(['text_id','sign_form']).agg('count')['f'])
df_nonopen_nstd_counts = pd.DataFrame(df_nonopen_nstd.groupby(['text_id','sign_form']).agg('count')['b'])

df_open_counts_all = pd.concat([df_open_std_counts,df_open_nstd_counts,df_nonopen_std_counts,df_nonopen_nstd_counts],axis=1)
df_open_counts_all = df_open_counts_all.fillna(0).reset_index()
df_open_counts_all

Unnamed: 0,text_id,sign_form,a,m,f,b
0,P237089,ARAD,1.0,0.0,0.0,0.0
1,P237089,DI,1.0,0.0,0.0,0.0
2,P237089,KA,1.0,0.0,0.0,0.0
3,P237089,LU,1.0,0.0,2.0,0.0
4,P237089,LU₂,0.0,0.0,0.0,1.0
5,P237089,MU,1.0,0.0,0.0,0.0
6,P237089,NI,1.0,0.0,4.0,0.0
7,P237089,TI,0.0,0.0,0.0,1.0
8,P237089,URU,0.0,0.0,1.0,0.0
9,P237089,ŠA,0.0,0.0,2.0,1.0


In [384]:
d = {'n0':0,'n1':0,'n2':0,'n3':0}
for i, row in df_open_counts_all.iterrows():
    if row['a'] > 0.0 and row['m'] > 0.0:
        d['n3'] += 1
    elif row['a'] == 0.0 and row['m'] == 0.0:
        d['n0'] += 1
    elif row['f'] == 0.0 and row['b'] == 0.0:
        d['n0'] += 1
    elif row['a'] > 0 and row['m'] == 0.0 and row['f'] > 0 and row['b'] == 0.0:
        d['n1'] += 1
    elif row['a'] == 0.0 and row['m'] > 0.0 and row['f'] == 0.0 and row['b'] > 0.0:
        d['n1'] += 1
    else:
        d['n2'] += 1
d

{'n0': 1412, 'n1': 454, 'n2': 75, 'n3': 14}

In [390]:
print('Percent of signs forms consistently represented between opening formula and rest of letter:')
print("{0:.2%}".format(d['n1'] / (d['n1'] + d['n2'])))
print('Percent of sign forms INconsistently represented between opening formula and rest of letter:')
print("{0:.2%}".format(d['n2'] / (d['n1'] + d['n2'])))
print('Percent of sign forms INconsistently represented in opening formula itself:')
print("{0:.2%}".format(d['n3'] / (d['n1'] + d['n2'] + d['n3'])))

Percent of signs forms consistently represented between opening formula and rest of letter:
85.82%
Percent of sign forms INconsistently represented between opening formula and rest of letter:
14.18%
Percent of sign forms INconsistently represented in opening formula itself:
2.58%


Same for word usage

In [403]:
df_words_opening_a = pd.DataFrame()
df_words_nonopen_a = pd.DataFrame()
for pnum in pnum_list_a:
    try:
        #print('Good: '+pnum)
        df_pnum = df_words_dossier_a[(df_words_dossier_a['text_id'] == pnum) & (df_words_dossier_a['word_num'] <= df_opening_counts.loc[pnum]['nos_words'])]
        df_words_opening_a = pd.concat([df_words_opening_a,df_pnum])
        
        df_pnum = df_words_dossier_a[(df_words_dossier_a['text_id'] == pnum) & (df_words_dossier_a['word_num'] > df_opening_counts.loc[pnum]['nos_words'])]
        df_words_nonopen_a = pd.concat([df_words_nonopen_a,df_pnum])
    except KeyError:
        #print(pnum)
        continue
#df_words_opening_a
df_words_nonopen_a = df_words_nonopen_a[df_words_nonopen_a['text_id'].isin(list(df_words_opening_a['text_id'].unique()))]
df_words_nonopen_a

Unnamed: 0,cf,epos,file,form,frag,gw,line_label,norm,pos,ref,sense,text_id,word_num,word_sign_tot,lemma,lemma_norm,lemma_norm_form,dossier
443,ša,REL,P237089.json,ša,[ša],that,o 4,ša,REL,P237089.5.1,what,P237089,11,1,ša[that]REL,ša[that]REL:ša,ša[that]REL:ša:ša,SAA15.01.01.a
444,šarru,N,P237089.json,LUGAL,LUGAL,king,o 4,šarru,N,P237089.5.2,king,P237089,12,1,šarru[king]N,šarru[king]N:šarru,šarru[king]N:šarru:LUGAL,SAA15.01.01.a
445,bēlu,N,P237089.json,be-li₂,be-⸢li₂⸣,lord,o 4,bēlī,N,P237089.5.3,lord,P237089,13,2,bēlu[lord]N,bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:be-li₂,SAA15.01.01.a
446,šapāru,V,P237089.json,iš-pur-an-ni,[iš]-⸢pur⸣-an-ni,send,o 4,išpuranni,V,P237089.5.4,write,P237089,14,4,šapāru[send]V,šapāru[send]V:išpuranni,šapāru[send]V:išpuranni:iš-pur-an-ni,SAA15.01.01.a
447,mā,PRP,P237089.json,ma-a,[ma-a,saying,o 5,mā,PRP,P237089.6.1,saying,P237089,15,2,mā[saying]PRP,mā[saying]PRP:mā,mā[saying]PRP:mā:ma-a,SAA15.01.01.a
450,Ituʾaya,EN,P237089.json,{LU₂}i-tu₂-a.a,{⸢lu₂v⸣}i-[tu₂-a.a],Ituʾean,o 5,Ituʾaya,EN,P237089.6.4,Ituʾean,P237089,18,4,Ituʾaya[Ituʾean]EN,Ituʾaya[Ituʾean]EN:Ituʾaya,Ituʾaya[Ituʾean]EN:Ituʾaya:{LU₂}i-tu₂-a.a,SAA15.01.01.a
451,ša,REL,P237089.json,ša,ša,that,o 5,ša,REL,P237089.6.5,who,P237089,19,1,ša[that]REL,ša[that]REL:ša,ša[that]REL:ša:ša,SAA15.01.01.a
452,dullu,N,P237089.json,dul₆-lu,dul₆-lu,trouble,o 5,dullu,N,P237089.6.6,work,P237089,20,2,dullu[trouble]N,dullu[trouble]N:dullu,dullu[trouble]N:dullu:dul₆-lu,SAA15.01.01.a
453,ina,PRP,P237089.json,ina,[ina,in,o 6,ina,PRP,P237089.7.1,in,P237089,21,1,ina[in]PRP,ina[in]PRP:ina,ina[in]PRP:ina:ina,SAA15.01.01.a
454,Dur-Šarruken,GN,P237089.json,{URU}BAD₃-MAN-GIN,{uru}BAD₃-MAN]-⸢GIN⸣,1,o 6,Dur-Šarruken,GN,P237089.7.2,1,P237089,22,1,Dur-Šarruken[1]GN,Dur-Šarruken[1]GN:Dur-Šarruken,Dur-Šarruken[1]GN:Dur-Šarruken:{URU}BAD₃-MAN-GIN,SAA15.01.01.a


In [428]:
word_list = ['šulmu[completeness]N:šulmu','bēlu[lord]N:bēlīya','bēlu[lord]N:bēlī','lū[may]MOD:lū','šarru[king]N:šarru','šarru[king]N:šarri']

In [429]:
df_words_open_std = df_words_opening_a[(df_words_opening_a['lemma_norm'].isin(word_list))]
#df_open_nstd = df_words_opening_a[(df_words_opening_a['lemma_norm'].isin(sign_list)) & (df_words_opening_a['mods_str'] != '..')]

df_words_nonopen_std = df_words_nonopen_a[(df_words_nonopen_a['lemma_norm'].isin(word_list))]
#df_nonopen_nstd = df_words_nonopen_a[(df_words_nonopen_a['lemma_norm'].isin(sign_list)) & (df_words_nonopen_a['mods_str'] != '..')]

#df_words_open_std

In [430]:
df_words_open_std_counts = pd.DataFrame(df_words_open_std.groupby(['text_id','lemma_norm','lemma_norm_form']).agg('count')['cf'])
df_words_nonopen_std_counts = pd.DataFrame(df_words_nonopen_std.groupby(['text_id','lemma_norm','lemma_norm_form']).agg('count')['gw'])
#df_words_open_std_counts

In [431]:
df_words_opening_counts_all = pd.concat([df_words_open_std_counts,df_words_nonopen_std_counts],axis=1)
df_words_opening_counts_all.fillna(0).to_csv('test.csv',encoding='utf-16',sep='\t')

PermissionError: [Errno 13] Permission denied: 'test.csv'

In [432]:
df_words_opening_notext = pd.DataFrame(df_words_open_std.groupby(['lemma_norm','lemma_norm_form']).agg('count')['cf'])
df_words_nonopen_notext = pd.DataFrame(df_words_nonopen_std.groupby(['lemma_norm','lemma_norm_form']).agg('count')['gw'])
df_words_opening_notext_all = pd.concat([df_words_opening_notext,df_words_nonopen_notext],axis=1)
df_words_opening_notext_all

Unnamed: 0_level_0,Unnamed: 1_level_0,cf,gw
lemma_norm,lemma_norm_form,Unnamed: 2_level_1,Unnamed: 3_level_1
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:BE-i,,1
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:EN,,53
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:be-li,,7
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:be-li₂,,158
bēlu[lord]N:bēlī,bēlu[lord]N:bēlī:be-li₂-i,,1
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:BE-ia,4.0,2
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:BE-ia₂,,1
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:EN-a,18.0,7
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:EN-ia,172.0,50
bēlu[lord]N:bēlīya,bēlu[lord]N:bēlīya:EN-ia₂,56.0,32


In [448]:
print('LUGAL appear ' + str(df_words[df_words['form'] == 'LUGAL'].shape[0]) + ' times (by itself)')
print('MAN appears ' + str(df_words[df_words['form'] == 'MAN'].shape[0]) + ' times (by itself)')

LUGAL appear 2753 times (by itself)
MAN appears 27 times (by itself)


In [451]:
df_MAN = pd.DataFrame(df_words[(df_words['form'].str.contains('MAN')) & (df_words['lemma'] == 'šarru[king]N')].groupby('text_id').agg('count')['cf'])
df_MAN

Unnamed: 0_level_0,cf
text_id,Unnamed: 1_level_1
P224485,1
P313453,1
P313480,2
P313814,1
P314321,1
P334113,3
P334158,1
P334179,6
P334201,1
P334360,1


In [452]:
pd.concat([df_class.loc[list(df_MAN.index)],df_MAN],axis=1)

Unnamed: 0_level_0,ancient_author,designation,dossier,dossier_nocertain,saa_chap,senderloc,cf
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
P224485,Sargon II,SAA 01 001,SAA01.01.01.a,SAA01.01.01,SAA01.01,Royal Court,1
P313453,Ṭab-ṣill-Ešarra,SAA 05 291,SAA05.14.01.c,SAA05.14.01,SAA05.14,Ashur,1
P313480,Nashir-Bel (Liphur-Bel),SAA 05 015,SAA05.01.01.b,SAA05.01.01,SAA05.01,Amidi,2
P313814,Nabu-duru-uṣur,SAA 15 129,SAA15.05.02.c,SAA15.05.02,SAA15.05,Der,1
P314321,(unknown),SAA 05 189,SAA05.09.01.c,SAA05.09.01,SAA05.09,Northeastern Assyria,1
P334113,Ina-šar-Bel-allak,SAA 01 128,SAA01.06.02.a,SAA01.06.02,SAA01.06,Dur-Sharruken,3
P334158,Hunni,SAA 01 133,SAA01.07.01.a,SAA01.07.01,SAA01.07,Nineveh,1
P334179,Aššur-belu-uda’’an,SAA 05 126,SAA05.07.02.a,SAA05.07.02,SAA05.07,Kurbail,6
P334201,Šarru-emuranni,SAA 15 217,SAA15.07.01.a,SAA15.07.01,SAA15.07,Babylon,1
P334360,Sin-ila’i,SAA 15 241,SAA15.07.03.a,SAA15.07.03,SAA15.07,Central or Southern Babylonia,1


Information for sender location

In [47]:
df_senderloc_certain = df_class[(df_class['senderloc'] != 'uncertain')]

list_senderloc_certain = list(df_senderloc_certain.index)

senderloc_tot = len(df_senderloc_certain['senderloc'].unique())
senderloc_tot

tm_senderloc_certain = tm_dist_all.loc[list(set(list_senderloc_certain) & set(list(tm_dist_all.index)))]
tm_senderloc_certain

Unnamed: 0,NA:..,NA:.t.,NI:..,NI:.d.,bēlu[lord]N:bēlīya:EN-ia,bēlu[lord]N:bēlīya:be-li₂-ia,ia,ia₂,lā[not]MOD:lā:la,lā[not]MOD:lā:la-a,lū[may]MOD:lū:lu,lū[may]MOD:lū:lu-u,u,u₂,|ME.U.U.U|:..,|ME.U.U.U|:.m.,ša,ša₂,šu,šu₂
P313635,0.000000,1.000000,1.000000,0.000000,0.713624,0.286376,0.802607,0.197393,1.000000,0.000000,0.730878,0.269122,0.449193,0.550807,0.724797,0.275203,0.865585,0.134415,0.500000,0.500000
P313609,1.000000,0.000000,1.000000,0.000000,0.713624,0.286376,1.000000,0.000000,0.841892,0.158108,0.730878,0.269122,0.666667,0.333333,0.724797,0.275203,1.000000,0.000000,0.000000,1.000000
P313425,0.769231,0.230769,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.800000,0.200000,1.000000,0.000000,0.473684,0.526316,0.250000,0.750000,0.823529,0.176471,1.000000,0.000000
P313995,0.852654,0.147346,1.000000,0.000000,0.713624,0.286376,0.802607,0.197393,1.000000,0.000000,0.730878,0.269122,1.000000,0.000000,0.724797,0.275203,1.000000,0.000000,0.302184,0.697816
P334210,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.841892,0.158108,1.000000,0.000000,0.500000,0.500000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000
P334305,0.333333,0.666667,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.841892,0.158108,1.000000,0.000000,0.200000,0.800000,0.000000,1.000000,0.714286,0.285714,1.000000,0.000000
P313483,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.800000,0.200000,0.841892,0.158108,0.500000,0.500000,0.222222,0.777778,0.724797,0.275203,1.000000,0.000000,0.333333,0.666667
P334783,0.555556,0.444444,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.500000,0.500000,0.500000,0.500000,0.600000,0.400000,0.666667,0.333333,0.600000,0.400000,0.333333,0.666667
P334139,1.000000,0.000000,1.000000,0.000000,0.333333,0.666667,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.500000,0.500000,1.000000,0.000000,0.500000,0.500000,0.375000,0.625000
P334390,0.413793,0.586207,1.000000,0.000000,0.333333,0.666667,1.000000,0.000000,0.841892,0.158108,1.000000,0.000000,0.400000,0.600000,0.961538,0.038462,0.750000,0.250000,0.302184,0.697816


Information for dossiers

In [48]:
df_dossier_certain_a = df_class[df_class['dossier'].str.contains('.a')]
df_dossier_certain_b = df_class[df_class['dossier'].str.contains('.b')]
df_dossier_certain_c = df_class[df_class['dossier'].str.contains('.c')]

list_dossier_certain_a = list(df_dossier_certain_a.index)
list_dossier_certain_b = list(df_dossier_certain_b.index)
list_dossier_certain_c = list(df_dossier_certain_c.index)

dossier_certain_a_tot = len(df_dossier_certain_a['dossier'].unique())
dossier_certain_b_tot = len(df_dossier_certain_b['dossier'].unique())
dossier_certain_c_tot = len(df_dossier_certain_c['dossier'].unique())

print(str(dossier_certain_a_tot) + ' unique dossiers in ' + str(len(list_dossier_certain_a)) + ' texts')
print(str(dossier_certain_b_tot) + ' unique dossiers in ' + str(len(list_dossier_certain_b)) + ' texts')
print(str(dossier_certain_c_tot) + ' unique dossiers in ' + str(len(list_dossier_certain_c)) + ' texts')
print(str(len(df_class['dossier_nocertain'].unique())))

106 unique dossiers in 276 texts
44 unique dossiers in 69 texts
93 unique dossiers in 609 texts
161


In [49]:
tm_dossier = tm_dist_all.loc[list(set(df_class.index) & set(list(tm_dist_all.index)))]
tm_dossier

Unnamed: 0,NA:..,NA:.t.,NI:..,NI:.d.,bēlu[lord]N:bēlīya:EN-ia,bēlu[lord]N:bēlīya:be-li₂-ia,ia,ia₂,lā[not]MOD:lā:la,lā[not]MOD:lā:la-a,lū[may]MOD:lū:lu,lū[may]MOD:lū:lu-u,u,u₂,|ME.U.U.U|:..,|ME.U.U.U|:.m.,ša,ša₂,šu,šu₂
P313635,0.000000,1.000000,1.000000,0.000000,0.713624,0.286376,0.802607,0.197393,1.000000,0.000000,0.730878,0.269122,0.449193,0.550807,0.724797,0.275203,0.865585,0.134415,0.500000,0.500000
P313609,1.000000,0.000000,1.000000,0.000000,0.713624,0.286376,1.000000,0.000000,0.841892,0.158108,0.730878,0.269122,0.666667,0.333333,0.724797,0.275203,1.000000,0.000000,0.000000,1.000000
P313425,0.769231,0.230769,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.800000,0.200000,1.000000,0.000000,0.473684,0.526316,0.250000,0.750000,0.823529,0.176471,1.000000,0.000000
P313995,0.852654,0.147346,1.000000,0.000000,0.713624,0.286376,0.802607,0.197393,1.000000,0.000000,0.730878,0.269122,1.000000,0.000000,0.724797,0.275203,1.000000,0.000000,0.302184,0.697816
P334210,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.841892,0.158108,1.000000,0.000000,0.500000,0.500000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000
P334305,0.333333,0.666667,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.841892,0.158108,1.000000,0.000000,0.200000,0.800000,0.000000,1.000000,0.714286,0.285714,1.000000,0.000000
P313483,1.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.800000,0.200000,0.841892,0.158108,0.500000,0.500000,0.222222,0.777778,0.724797,0.275203,1.000000,0.000000,0.333333,0.666667
P334783,0.555556,0.444444,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.500000,0.500000,0.500000,0.500000,0.600000,0.400000,0.666667,0.333333,0.600000,0.400000,0.333333,0.666667
P334139,1.000000,0.000000,1.000000,0.000000,0.333333,0.666667,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.500000,0.500000,1.000000,0.000000,0.500000,0.500000,0.375000,0.625000
P334390,0.413793,0.586207,1.000000,0.000000,0.333333,0.666667,1.000000,0.000000,0.841892,0.158108,1.000000,0.000000,0.400000,0.600000,0.961538,0.038462,0.750000,0.250000,0.302184,0.697816


Information for SAA chapters

In [50]:
saachap_tot = len(df_class['saa_chap'].unique())
saachap_tot

tm_saachap = tm_dist_all.loc[list(set(df_class.index) & set(list(tm_dist_all.index)))]
#tm_saachap
saachap_tot

37

Define the function for evaluation

In [51]:
from random import shuffle
def calculate_purity_score(dict_class,n):
    purity_score = 0
    for s in dict_class:
        cnt = Counter(dict_class[s])
        purity_score += cnt.most_common()[0][1]

    purity_score = purity_score / n
    return purity_score    

def labels_to_dict(labels,pnums):
    d = {}
    for i in range(len(labels)):
        d[pnums[i]] = labels[i]
    return d

In [304]:
def evaluate_clusters(tm_class,feature_list,flist_str,class_type,K,df_cl):
    list_evals = []
    c = 0
    for g in feature_list:
        c += 1
        print(str(c) + '. Evaluating: ' + str(g))
        km = KMeans(n_clusters=K,max_iter=1000).fit(tm_class[g])
        labels_random = km.labels_.copy()
        shuffle(labels_random)

        #Create Dictionary of cluster => list of pnums
        clustered = {}
        clustered_random = {}
        for i in range(len(km.labels_)):
            if km.labels_[i] in clustered:
                clustered[km.labels_[i]].append(df_cl.loc[tm_class.index[i]][class_type])
            else:
                clustered[km.labels_[i]] = [df_cl.loc[tm_class.index[i]][class_type]]
                
            if labels_random[i] in clustered_random:
                clustered_random[labels_random[i]].append(df_cl.loc[tm_class.index[i]][class_type])
            else:
                clustered_random[labels_random[i]] = [df_cl.loc[tm_class.index[i]][class_type]]
        
        #Create Dictionary of Pnum => cluster
        pnum_clusters = labels_to_dict(km.labels_,tm_class.index)
                
        #purity score
        purity_score_class = calculate_purity_score(clustered,len(tm_class.index))
        purity_score_random = calculate_purity_score(clustered_random,len(tm_class.index))
        
        d = {'group':g,
             'purity_class': purity_score_class,
             'purity_random': purity_score_random,
             'group_len': len(g),
             'class_type': class_type,
             'feature_type': flist_str,
             'num_clusters': K}
        list_evals.append(d)

    df_evals = pd.DataFrame(list_evals)
    #Error columns
    df_evals['error_sub'] = df_evals['purity_class'] - df_evals['purity_random']
    df_evals['error_div'] = df_evals['purity_class'] / df_evals['purity_random']
    df_evals.to_csv('output/evaluation_' + class_type + '_' + flist_str + '.csv',encoding='utf-8')
    return df_evals

In [56]:
df_evals_sender = evaluate_clusters(tm_senderloc_certain,list_power,'allpair','senderloc',senderloc_tot)
#df_evals_dossier = evaluate_clusters(tm_dossier,list_power,'allpair','dossier',len(df_class['dossier_nocertain'].unique()))
#df_evals_saachap = evaluate_clusters(tm_saachap,list_power,'allpair','saa_chap',saachap_tot)

1. Evaluating: ['NA:..', 'NA:.t.']
2. Evaluating: ['NI:..', 'NI:.d.']
3. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.']
4. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.']
5. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.']
6. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.']
7. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.']
8. Evaluating: ['ia', 'ia₂']
9. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂']
10. Evaluating: ['NI:..', 'NI:.d.', 'ia', 'ia₂']
11. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ia', 'ia₂']
12. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂']
13. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂']
14. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂']
15. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂']
16. Evaluating: ['ša', 'ša₂']
17. Evaluating: ['NA:..', 'NA:.t.', 

110. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂']
111. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂']
112. Evaluating: ['ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
113. Evaluating: ['NA:..', 'NA:.t.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
114. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
115. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
116. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
117. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
118. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
119. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂']
120. Evaluating: ['ia', 'ia₂', 

178. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
179. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
180. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
181. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
182. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
183. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
184. Evaluating: ['ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīy

236. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
237. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
238. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
239. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
240. Evaluating: ['ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
241. Evaluating: ['NA:..', 'NA:.t.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia']
242. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', '

302. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
303. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
304. Evaluating: ['ša', 'ša₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
305. Evaluating: ['NA:..', 'NA:.t.', 'ša', 'ša₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
306. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
307. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
308. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
309. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
310. Evaluating: ['N

368. Evaluating: ['ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
369. Evaluating: ['NA:..', 'NA:.t.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
370. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
371. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
372. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
373. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
374. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
375. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂'

420. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
421. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
422. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
423. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
424. Evaluating: ['ia', 'ia₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
425. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bē

466. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
467. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ša', 'ša₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
468. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
469. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
470. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
471. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.

510. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
511. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a']
512. Evaluating: ['lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
513. Evaluating: ['NA:..', 'NA:.t.', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
514. Evaluating: ['NI:..', 'NI:.d.', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
515. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
516. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
517. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
518. Eva

580. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
581. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
582. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
583. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
584. Evaluating: ['ia', 'ia₂', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
585. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
586. Evaluating: ['NI:..', 'NI:.d.', 'ia', 'ia₂', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
587. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ia', 'ia₂', 'u', 'u₂', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
588. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'u', 'u₂', 'lū[may]MOD

644. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
645. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
646. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
647. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
648. Evaluating: ['ia', 'ia₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
649. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
650. Evaluating: ['

692. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
693. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
694. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
695. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
696. Evaluating: ['ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
697. Evaluating: ['NA:..', 'NA:.t.', 'ia'

737. Evaluating: ['NA:..', 'NA:.t.', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
738. Evaluating: ['NI:..', 'NI:.d.', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
739. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
740. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
741. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
742. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya

784. Evaluating: ['ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
785. Evaluating: ['NA:..', 'NA:.t.', 'ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
786. Evaluating: ['NI:..', 'NI:.d.', 'ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
787. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
788. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
789. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
790. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu'

836. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
837. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
838. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
839. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
840. Evaluating: ['ia', 'ia₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
841. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
842. Evaluating: ['NI:..', 'NI:.d.', 'ia', 'ia₂', 'u', 'u₂', 

886. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
887. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
888. Evaluating: ['ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
889. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
890. Evaluating: ['NI:..', 'NI:.d.', 'ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'u', 'u₂', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
891. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ia', 'ia₂', 'ša', 'ša₂', 'šu', 'šu₂', 'u', '

927. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'ša', 'ša₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
928. Evaluating: ['šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
929. Evaluating: ['NA:..', 'NA:.t.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
930. Evaluating: ['NI:..', 'NI:.d.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
931. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'šu', 'šu₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[m

964. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
965. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
966. Evaluating: ['NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
967. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
968. Evaluating: ['ia', 'ia₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be

1001. Evaluating: ['NA:..', 'NA:.t.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
1002. Evaluating: ['NI:..', 'NI:.d.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
1003. Evaluating: ['NA:..', 'NA:.t.', 'NI:..', 'NI:.d.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
1004. Evaluating: ['|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu', 'šu₂', 'u', 'u₂', 'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:be-li₂-ia', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:la-a', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u']
1005. Evaluating: ['NA:..', 'NA:.t.', '|ME.U.U.U|:..', '|ME.U.U.U|:.m.', 'ia', 'ia₂', 'šu

In [None]:
df_evals_all = pd.concat([df_evals_sender_ortho_word,
                          df_evals_sender_ortho_sign,
                         df_evals_sender_paleo,
                          df_evals_dossier_ortho_word,
                          df_evals_dossier_ortho_sign,
                          df_evals_dossier_paleo,
                         df_evals_saachap_ortho_word,
                         df_evals_saachap_ortho_sign,
                         df_evals_saachap_paleo])
df_evals_all.to_csv('output/evaluation_all.csv',encoding='utf-8')

---LAS MATERIAL---

In [117]:
saa8_signs,saa8_words = parse_corpus('saa08')
saa10_signs,saa10_words = parse_corpus('saa10')

done
done


In [120]:
df_las_signs = pd.concat([pd.DataFrame(saa8_signs),pd.DataFrame(saa10_signs)])
df_las_signs = las_signs.fillna('')
df_las_signs

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,line_label,m,norm,pos,ref,sense,sign_loc_id,text_id,word_num,word_sign_tot
0,,1(diš),,no-cf,no-epos,,P236880.json,1,1,no-gw,o 1,,no-norm,n,P236880.2.1,no-sense,P236880.2.1.0,P236880,1,1
1,,3(u),,Sin,DN,,P236880.json,30,30,1,o 1,,Sin,DN,P236880.2.2,1,P236880.2.2.0,P236880,2,1
2,,ina,,ina,PRP,,P236880.json,ina,ina,in,o 1,,ina,PRP,P236880.2.3,in,P236880.2.3.0,P236880,3,1
3,,ITI,,Addaru,MN,,P236880.json,{ITI}ŠE,{iti}ŠE,Adar,o 1,,Addari,MN,P236880.2.4,Adar,P236880.2.4.0,P236880,4,1
4,,ŠE,,Addaru,MN,,P236880.json,{ITI}ŠE,{iti}ŠE,Adar,o 1,,Addari,MN,P236880.2.4,Adar,P236880.2.4.1,P236880,4,1
5,,UD,,ūmu,N,,P236880.json,UD,UD,day,o 1,,ūm,N,P236880.2.5,day,P236880.2.5.0,P236880,5,1
6,,1(u) 4(diš),,no-cf,no-epos,,P236880.json,14-KAM₂,14-KAM₂,no-gw,o 1,,no-norm,n,P236880.2.6,no-sense,P236880.2.6.0,P236880,6,2
7,,KAM₂,,no-cf,no-epos,,P236880.json,14-KAM₂,14-KAM₂,no-gw,o 1,,no-norm,n,P236880.2.6,no-sense,P236880.2.6.1,P236880,6,2
8,,lu,,lū,CNJ,,P236880.json,lu,lu,or,o 1,,lū,CNJ,P236880.2.7,or,P236880.2.7.0,P236880,7,1
9,,UD,,ūmu,N,,P236880.json,UD,UD,day,o 1,,ūm,N,P236880.2.8,day,P236880.2.8.0,P236880,8,1


In [122]:
file_names = df_las_signs['file'].unique()
df_las_signs['sign_form'] = df_las_signs['b'].apply(lambda x: sign_index.get(x.lower(),'?'))

import re
def get_num_part(s):
    try:
        n = re.findall(r'[₀₁₂₃₄₅₆₇₈₉]+',s)[0]
        n = n.replace('₀','0').replace('₁','1').replace('₂','2').replace('₃','3').replace('₄','4')
        n = n.replace('₅','5').replace('₆','6').replace('₇','7').replace('₈','8').replace('₉','9')
    except:
        n = 1
    return n

def get_str_part(s):
    try:
        n = re.findall(r'[a-zA-ZšŠṣṢṭṬʾ \(\)0-9]+',s)[0]
    except:
        n = s
    return n
        
df_las_signs['str_part'] = df_las_signs['b'].apply(lambda x: get_str_part(x))
df_las_signs['num_part'] = df_las_signs['b'].apply(lambda x: get_num_part(x))
#df['combined'] = df['sign_form'] + ':' + df['mods_str']
df_las_signs

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,pos,ref,sense,sign_loc_id,text_id,word_num,word_sign_tot,sign_form,str_part,num_part
0,,1(diš),,no-cf,no-epos,,P236880.json,1,1,no-gw,...,n,P236880.2.1,no-sense,P236880.2.1.0,P236880,1,1,DIŠ,1(diš),1
1,,3(u),,Sin,DN,,P236880.json,30,30,1,...,DN,P236880.2.2,1,P236880.2.2.0,P236880,2,1,|U.U.U|,3(u),1
2,,ina,,ina,PRP,,P236880.json,ina,ina,in,...,PRP,P236880.2.3,in,P236880.2.3.0,P236880,3,1,AŠ,ina,1
3,,ITI,,Addaru,MN,,P236880.json,{ITI}ŠE,{iti}ŠE,Adar,...,MN,P236880.2.4,Adar,P236880.2.4.0,P236880,4,1,|UD×(U.U.U)|,ITI,1
4,,ŠE,,Addaru,MN,,P236880.json,{ITI}ŠE,{iti}ŠE,Adar,...,MN,P236880.2.4,Adar,P236880.2.4.1,P236880,4,1,ŠE,ŠE,1
5,,UD,,ūmu,N,,P236880.json,UD,UD,day,...,N,P236880.2.5,day,P236880.2.5.0,P236880,5,1,UD,UD,1
6,,1(u) 4(diš),,no-cf,no-epos,,P236880.json,14-KAM₂,14-KAM₂,no-gw,...,n,P236880.2.6,no-sense,P236880.2.6.0,P236880,6,2,?,1(u) 4(diš),1
7,,KAM₂,,no-cf,no-epos,,P236880.json,14-KAM₂,14-KAM₂,no-gw,...,n,P236880.2.6,no-sense,P236880.2.6.1,P236880,6,2,KAM₂,KAM,2
8,,lu,,lū,CNJ,,P236880.json,lu,lu,or,...,CNJ,P236880.2.7,or,P236880.2.7.0,P236880,7,1,LU,lu,1
9,,UD,,ūmu,N,,P236880.json,UD,UD,day,...,N,P236880.2.8,day,P236880.2.8.0,P236880,8,1,UD,UD,1


In [123]:
df2 = pd.DataFrame(df_las_signs.groupby(['str_part'])['num_part'].agg('nunique'))
list_ortho_syls = list(df2[df2[('num_part')] > 1].index)

list_ortho_syls = [h for h in list_ortho_syls if len(re.findall(r'[A-Z]',h)) == 0]
list_ortho_syls

df_las_ortho_signs = df_las_signs[df_las_signs['str_part'].isin(list_ortho_syls)]
df_las_ortho_signs

Unnamed: 0,a,b,break,cf,epos,f,file,form,frag,gw,...,pos,ref,sense,sign_loc_id,text_id,word_num,word_sign_tot,sign_form,str_part,num_part
8,,lu,,lū,CNJ,,P236880.json,lu,lu,or,...,CNJ,P236880.2.7,or,P236880.2.7.0,P236880,7,1,LU,lu,1
26,,la,,lā,MOD,,P236880.json,la,la,not,...,MOD,P236880.4.4,not,P236880.4.4.0,P236880,19,1,LA,la,1
30,,šu₂,,simānu,N,,P236880.json,si-ma-ni-šu₂,si-ma-ni-šu₂,(right) occasion,...,N,P236880.4.5,time,P236880.4.5.3,P236880,20,4,ŠU₂,šu,2
38,,e,,tebû,N,,P236880.json,ZI-e,ZI-e,arise,...,V,P236880.5.1,attack,P236880.5.1.1,P236880,24,2,E,e,1
41,,tu₂,,kiššūtu,N,,P236880.json,ŠU₂-tu₂,ŠU₂-tu₂,exercise of power,...,N,P236880.5.3,exercise of power,P236880.5.3.1,P236880,26,2,UD,tu,2
44,,ša₂,,ša,DET,,P236880.json,ša₂,ša₂,of,...,DET,P236880.5.6,of,P236880.5.6.0,P236880,28,1,GAR,ša,2
53,,u,,u,CNJ,,P236880.json,u,u,and,...,CNJ,P236880.7.5,and,P236880.7.5.0,P236880,35,1,U,u,1
69,,ša₂,,ša,DET,,P236880.json,ša₂,ša₂,of,...,DET,P236880.9.1,of,P236880.9.1.0,P236880,45,1,GAR,ša,2
73,,pi,,ṭuppu,N,,P236880.json,DUB-pi,DUB-pi,tablet,...,N,P236880.9.3,tablet,P236880.9.3.1,P236880,47,2,PI,pi,1
88,,nu,missing,Ṣalbatanu,CN,,P236880.json,{MUL}ṣal-bat-a-nu,{mul}[ṣal-bat-a-nu,Mars,...,CN,P236880.11.5,Mars,P236880.11.5.4,P236880,57,5,NU,nu,1


In [124]:
df_las_ortho_str = pd.DataFrame(df_las_ortho_signs.groupby(['text_id']).apply(lambda x: ' '.join(x['b'])))
df_las_ortho_str.columns = ['ortho_str']
df_las_ortho_str

cv = CountVectorizer(token_pattern='[^ ]+',lowercase=False)
ft = cv.fit_transform(list(df_las_ortho_str['ortho_str']))
tm_las_ortho_sign = pd.DataFrame(ft.toarray(),columns=cv.get_feature_names(),index=df_las_ortho_str.index)
tm_las_ortho_sign

Unnamed: 0_level_0,ad,ad₂,ap,ap₂,ar,ar₂,as,as₂,aš,aš₂,...,ʾa,ʾa₃,ṣu,ṣu₂,ṭah,ṭah₂,ṭe,ṭe₃,ṭu,ṭu₂
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P236880,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236907,2,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
P236915,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236916,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236917,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236918,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
P236919,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236920,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236921,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
P236922,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [264]:
df_las_words = pd.concat([pd.DataFrame(saa8_words),pd.DataFrame(saa10_words)])
df_las_words = df_las_words[(df_las_words['cf'] != 'no-cf')]

df_las_words['lemma'] = df_las_words['cf'] + '[' + df_las_words['gw'].apply(lambda x: x.replace(' ','-')) + ']' + df_las_words['pos']
df_las_words['lemma_norm'] = df_las_words['lemma'] + ':' + df_las_words['norm']
df_las_words['lemma_norm_form'] = df_las_words['lemma_norm'] + ':' + df_las_words['form']

df_norm_uniq = pd.DataFrame(df_las_words.groupby('lemma_norm')['form'].nunique())
list_ortho_words = list(df_norm_uniq[df_norm_uniq[('form')] > 1].index)

df_form_50 = pd.DataFrame(df_las_words.groupby('lemma_norm_form')['form'].agg('count'))
list_form_50 = list(df_form_50[df_form_50[('form')] > 50].index)

df_las_ortho_words = df_las_words[(df_las_words['lemma_norm'].isin(list_ortho_words)) & df_las_words['lemma_norm_form'].isin(list_form_50)]
df_las_ortho_words#[df_las_ortho_words['cf'] == 'ammar']

Unnamed: 0,cf,epos,file,form,frag,gw,line_label,norm,pos,ref,sense,text_id,word_num,word_sign_tot,lemma,lemma_norm,lemma_norm_form
1,Sin,DN,P236880.json,30,30,1,o 1,Sin,DN,P236880.2.2,1,P236880,2,1,Sin[1]DN,Sin[1]DN:Sin,Sin[1]DN:Sin:30
2,ina,PRP,P236880.json,ina,ina,in,o 1,ina,PRP,P236880.2.3,in,P236880,3,1,ina[in]PRP,ina[in]PRP:ina,ina[in]PRP:ina:ina
4,ūmu,N,P236880.json,UD,UD,day,o 1,ūm,N,P236880.2.5,day,P236880,5,1,ūmu[day]N,ūmu[day]N:ūm,ūmu[day]N:ūm:UD
7,ūmu,N,P236880.json,UD,UD,day,o 1,ūm,N,P236880.2.8,day,P236880,8,1,ūmu[day]N,ūmu[day]N:ūm,ūmu[day]N:ūm:UD
9,itti,PRP,P236880.json,KI,KI,with,o 1,itti,PRP,P236880.2.10,with,P236880,10,1,itti[with]PRP,itti[with]PRP:itti,itti[with]PRP:itti:KI
10,Šamaš,DN,P236880.json,20,20,1,o 1,Šamaš,DN,P236880.2.11,1,P236880,11,1,Šamaš[1]DN,Šamaš[1]DN:Šamaš,Šamaš[1]DN:Šamaš:20
12,amāru,V,P236880.json,IGI,IGI,see,o 1,innamir,V,P236880.2.13,be(come) seen,P236880,13,1,amāru[see]V,amāru[see]V:innamir,amāru[see]V:innamir:IGI
16,Sin,DN,P236880.json,30,30,1,o 3,Sin,DN,P236880.4.2,1,P236880,17,1,Sin[1]DN,Sin[1]DN:Sin,Sin[1]DN:Sin:30
17,ina,PRP,P236880.json,ina,ina,in,o 3,ina,PRP,P236880.4.3,in,P236880,18,1,ina[in]PRP,ina[in]PRP:ina,ina[in]PRP:ina:ina
18,lā,MOD,P236880.json,la,la,not,o 3,lā,MOD,P236880.4.4,not,P236880,19,1,lā[not]MOD,lā[not]MOD:lā,lā[not]MOD:lā:la


In [265]:
df_las_ortho_wordstr = pd.DataFrame(df_las_ortho_words.groupby(['text_id']).apply(lambda x: ' '.join(x['lemma_norm_form'])))
df_las_ortho_wordstr.columns = ['ortho_wordstr']
df_las_ortho_wordstr

cv = CountVectorizer(token_pattern='[^ ]+',lowercase=False)
ft = cv.fit_transform(list(df_las_ortho_wordstr['ortho_wordstr']))
tm_las_ortho_word = pd.DataFrame(ft.toarray(),columns=cv.get_feature_names(),index=df_las_ortho_wordstr.index)
#tm_las_ortho_word.to_csv('output/tm_ortho_word.csv',encoding='utf-8',sep='\t')
tm_las_ortho_word.columns

Index(['Bel[1]DN:Bel:{d}EN', 'Dilbat[Venus]CN:Dilbat:{MUL}dil-bat',
       'Marduk[1]DN:Marduk:{d}AMAR.UTU',
       'Mat-Akkadi[Babylonia]GN:Mat-Akkadi:KUR-URI{KI}',
       'Mat-Akkadi[Babylonia]GN:Mat-Akkadi:URI{KI}',
       'Mat-Amurri[Westland]GN:Mat-Amurri:KUR-MAR.TU{KI}',
       'Nabu[1]DN:Nabu:{d}AG', 'Nabu[1]DN:Nabu:{d}PA',
       'Sagmegar[Jupiter]CN:Sagmegar:{MUL}SAG.ME.GAR', 'Sin[1]DN:Sin:30',
       'Sin[1]DN:Sin:{d}30', 'abu[father]N:abīšu:AD-šu₂',
       'adi[until]PRP:adi:a-di', 'amāru[see]V:innamir:IGI',
       'amāru[see]V:innamir:IGI-ir', 'amāru[see]V:innamrū:IGI-MEŠ',
       'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana',
       'antalû[eclipse]N:attalû:AN.MI', 'anāku[I]IP:anāku:a-na-ku',
       'arku[long]AJ:arkūti:GID₂.DA-MEŠ', 'bašû[exist]V:ibašši:GAL₂-ši',
       'bašû[exist]V:ibašši:i-ba-aš₂-ši', 'bēlu[lord]N:bēlī:be-li',
       'bēlu[lord]N:bēlī:be-li₂', 'bēlu[lord]N:bēlīni:EN-ni',
       'bēlu[lord]N:bēlīya:EN-ia', 'bēlu[lord]N:bēlīya:EN-ia₂',
       'bēlu[lord]N:

In [267]:
tm_las_counts = pd.concat([tm_las_ortho_sign,tm_las_ortho_word],axis=1)
tm_las_counts

Unnamed: 0,ad,ad₂,ap,ap₂,ar,ar₂,as,as₂,aš,aš₂,...,šumma[if]MOD:šumma:šum-ma,šū[he]IP:šû:šu-u,šū[he]IP:šû:šu-u₂,ūmu[day]N:ūm:UD,ūmu[day]N:ūmu:UD-mu,ūmu[day]N:ūmē:UD-MEŠ,Ṣalbatanu[Mars]CN:Ṣalbatanu:{MUL}ṣal-bat-a-nu,ṣēru[back]N:ṣēri:EDIN,ṭiābu[be(come)-good]V:iṭâb:DUG₃-ab,ṭūbu[goodness]N:ṭūb:ṭu-ub
P236880,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0
P236907,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P236915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
P236916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0
P236917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
P236918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
P236919,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
P236920,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0
P236921,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
P236922,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [128]:
df_las_ortho_sign_count = pd.DataFrame(df_las_ortho_signs.groupby(['str_part','b'])['cf'].agg('count'))
df_las_ortho_sign_count = df_las_ortho_sign_count[df_las_ortho_sign_count[('cf')] > 100]

df_las_ortho_sign_uniq = df_las_ortho_sign_count.groupby([('str_part')]).nunique()

df_las_ortho_sign_count = df_las_ortho_sign_count.loc[list(df_las_ortho_sign_uniq[df_las_ortho_sign_uniq[('cf')] > 1].index)]
df_las_ortho_sign_count

Unnamed: 0_level_0,Unnamed: 1_level_0,cf
str_part,b,Unnamed: 2_level_1
aš,aš,152
aš,aš₂,433
ia,ia,1169
ia,ia₂,471
li,li,629
li,li₂,889
tu,tu,602
tu,tu₂,363
u,u,1955
u,u₂,1755


In [129]:
df_las_ortho_word_count = pd.DataFrame(df_las_words.groupby(['lemma_norm','lemma_norm_form'])['cf'].agg('count'))
df_las_ortho_word_count = df_las_ortho_word_count[df_las_ortho_word_count[('cf')] > 50]

df_las_ortho_word_uniq = df_las_ortho_word_count.groupby([('lemma_norm')]).nunique()

df_las_ortho_word_count = df_las_ortho_word_count.loc[list(df_las_ortho_word_uniq[df_las_ortho_word_uniq[('cf')] > 1].index)]
df_las_ortho_word_count

Unnamed: 0_level_0,Unnamed: 1_level_0,cf
lemma_norm,lemma_norm_form,Unnamed: 2_level_1
Mat-Akkadi[Babylonia]GN:Mat-Akkadi,Mat-Akkadi[Babylonia]GN:Mat-Akkadi:KUR-URI{KI},178
Mat-Akkadi[Babylonia]GN:Mat-Akkadi,Mat-Akkadi[Babylonia]GN:Mat-Akkadi:URI{KI},60
Nabu[1]DN:Nabu,Nabu[1]DN:Nabu:{d}AG,208
Nabu[1]DN:Nabu,Nabu[1]DN:Nabu:{d}PA,168
Sin[1]DN:Sin,Sin[1]DN:Sin:30,744
Sin[1]DN:Sin,Sin[1]DN:Sin:{d}30,331
amāru[see]V:innamir,amāru[see]V:innamir:IGI,95
amāru[see]V:innamir,amāru[see]V:innamir:IGI-ir,86
ana[to]PRP:ana,ana[to]PRP:ana:a-na,1905
ana[to]PRP:ana,ana[to]PRP:ana:ana,376


In [269]:
#Orthography - Sign
map_ortho_sign = {'aš':['aš','aš₂'],
                  'ia':['ia','ia₂'],
                  'li':['li','li₂'],
                  'ša':['ša','ša₂'],
                  'šu':['šu','šu₂'],
                  'tu':['tu','tu₂'],
                  'u':['u','u₂']
                 }
list_ortho_sign = []
list_ortho_sign_pairs = []
for v in map_ortho_sign.values():
    list_ortho_sign = list_ortho_sign + v
    list_ortho_sign_pairs.append(v)
list_ortho_sign

#Orthography - Word
map_ortho_word = {'Sin[1]DN:Sin':['Sin[1]DN:Sin:30','Sin[1]DN:Sin:{d}30'],
                  'ana[to]PRP:ana':['ana[to]PRP:ana:a-na','ana[to]PRP:ana:ana'],
                  'libbu[interior]N:libbi':['libbu[interior]N:libbi:ŠA₃','libbu[interior]N:libbi:ŠA₃-bi'],
                  'lā[not]MOD:lā':['lā[not]MOD:lā:la','lā[not]MOD:lā:NU'],
                  'lū[may]MOD:lū':['lū[may]MOD:lū:lu','lū[may]MOD:lū:lu-u'],
                  #'mā[saying]PRP:mā':['mā[saying]PRP:mā:ma','mā[saying]PRP:mā:ma-a'],
                  'muhhu[skull]N:muhhi':['muhhu[skull]N:muhhi:UGU','muhhu[skull]N:muhhi:UGU-hi'],
                  'u[and]CNJ:u':['u[and]CNJ:u:u','u[and]CNJ:u:u₃'],
                  'ša[of]DET:ša':['ša[of]DET:ša:ša','ša[of]DET:ša:ša₂'],
                  'šulmu[completeness]N:šulmu':['šulmu[completeness]N:šulmu:DI-mu','šulmu[completeness]N:šulmu:šul-mu'],
                'šarru[king]N:šarru':['šarru[king]N:šarru:LUGAL','šarru[king]N:šarru:MAN']
                }
list_ortho_word = []
list_ortho_word_pairs = []
for v in map_ortho_word.values():
    list_ortho_word = list_ortho_word + v
    list_ortho_word_pairs.append(v)
list_ortho_word_pairs
list_features_all = list_ortho_sign + list_ortho_word
list_pairs_all = list_ortho_sign_pairs + list_ortho_word_pairs
list_features_all

['aš',
 'aš₂',
 'ia',
 'ia₂',
 'li',
 'li₂',
 'ša',
 'ša₂',
 'šu',
 'šu₂',
 'tu',
 'tu₂',
 'u',
 'u₂',
 'Sin[1]DN:Sin:30',
 'Sin[1]DN:Sin:{d}30',
 'ana[to]PRP:ana:a-na',
 'ana[to]PRP:ana:ana',
 'libbu[interior]N:libbi:ŠA₃',
 'libbu[interior]N:libbi:ŠA₃-bi',
 'lā[not]MOD:lā:la',
 'lā[not]MOD:lā:NU',
 'lū[may]MOD:lū:lu',
 'lū[may]MOD:lū:lu-u',
 'muhhu[skull]N:muhhi:UGU',
 'muhhu[skull]N:muhhi:UGU-hi',
 'u[and]CNJ:u:u',
 'u[and]CNJ:u:u₃',
 'ša[of]DET:ša:ša',
 'ša[of]DET:ša:ša₂',
 'šulmu[completeness]N:šulmu:DI-mu',
 'šulmu[completeness]N:šulmu:šul-mu',
 'šarru[king]N:šarru:LUGAL',
 'šarru[king]N:šarru:MAN']

In [292]:
map_all = map_ortho_sign.copy()
map_all.update(map_ortho_word)
map_all

{'Sin[1]DN:Sin': ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30'],
 'ana[to]PRP:ana': ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana'],
 'aš': ['aš', 'aš₂'],
 'ia': ['ia', 'ia₂'],
 'li': ['li', 'li₂'],
 'libbu[interior]N:libbi': ['libbu[interior]N:libbi:ŠA₃',
  'libbu[interior]N:libbi:ŠA₃-bi'],
 'lā[not]MOD:lā': ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU'],
 'lū[may]MOD:lū': ['lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u'],
 'muhhu[skull]N:muhhi': ['muhhu[skull]N:muhhi:UGU',
  'muhhu[skull]N:muhhi:UGU-hi'],
 'tu': ['tu', 'tu₂'],
 'u': ['u', 'u₂'],
 'u[and]CNJ:u': ['u[and]CNJ:u:u', 'u[and]CNJ:u:u₃'],
 'ša': ['ša', 'ša₂'],
 'ša[of]DET:ša': ['ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂'],
 'šarru[king]N:šarru': ['šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN'],
 'šu': ['šu', 'šu₂'],
 'šulmu[completeness]N:šulmu': ['šulmu[completeness]N:šulmu:DI-mu',
  'šulmu[completeness]N:šulmu:šul-mu']}

In [294]:
df_form_count = pd.DataFrame(df_las_words[df_las_words['lemma_norm_form'].isin(list_ortho_word)].
                             groupby(['lemma_norm','lemma_norm_form'])['cf'].agg('count'))
df_form_count

dict_form_count = {}
for ln in map_ortho_word:
    count1 = float(df_form_count.loc[(ln,map_ortho_word[ln][0])]['cf'])
    count2 = float(df_form_count.loc[(ln,map_ortho_word[ln][1])]['cf'])
    tot12 = count1 + count2
    dict_form_count[map_ortho_word[ln][0]] = count1 / tot12
    dict_form_count[map_ortho_word[ln][1]] = count2 / tot12
dict_form_count

df_sign_count = pd.DataFrame(df_las_signs[df_las_signs['b'].isin(list_ortho_sign)].groupby(['str_part','b'])['cf'].agg('count'))
df_sign_count

dict_sign_count = {}
for ln in map_ortho_sign:
    count1 = float(df_sign_count.loc[(ln,map_ortho_sign[ln][0])]['cf'])
    count2 = float(df_sign_count.loc[(ln,map_ortho_sign[ln][1])]['cf'])
    tot12 = count1 + count2
    dict_sign_count[map_ortho_sign[ln][0]] = count1 / tot12
    dict_sign_count[map_ortho_sign[ln][1]] = count2 / tot12
dict_sign_count

{'aš': 0.25982905982905985,
 'aš₂': 0.7401709401709402,
 'ia': 0.7128048780487805,
 'ia₂': 0.2871951219512195,
 'li': 0.41436100131752307,
 'li₂': 0.5856389986824769,
 'tu': 0.6238341968911917,
 'tu₂': 0.3761658031088083,
 'u': 0.5269541778975741,
 'u₂': 0.47304582210242585,
 'ša': 0.4757853403141361,
 'ša₂': 0.5242146596858639,
 'šu': 0.2829968944099379,
 'šu₂': 0.7170031055900621}

In [295]:
dict_count_all = dict_sign_count.copy()
dict_count_all.update(dict_form_count)
dict_count_all

{'Sin[1]DN:Sin:30': 0.6920930232558139,
 'Sin[1]DN:Sin:{d}30': 0.30790697674418605,
 'ana[to]PRP:ana:a-na': 0.8351600175361683,
 'ana[to]PRP:ana:ana': 0.16483998246383166,
 'aš': 0.25982905982905985,
 'aš₂': 0.7401709401709402,
 'ia': 0.7128048780487805,
 'ia₂': 0.2871951219512195,
 'li': 0.41436100131752307,
 'libbu[interior]N:libbi:ŠA₃': 0.5313935681470138,
 'libbu[interior]N:libbi:ŠA₃-bi': 0.4686064318529862,
 'li₂': 0.5856389986824769,
 'lā[not]MOD:lā:NU': 0.14192495921696574,
 'lā[not]MOD:lā:la': 0.8580750407830342,
 'lū[may]MOD:lū:lu': 0.48253275109170307,
 'lū[may]MOD:lū:lu-u': 0.517467248908297,
 'muhhu[skull]N:muhhi:UGU': 0.8329238329238329,
 'muhhu[skull]N:muhhi:UGU-hi': 0.16707616707616707,
 'tu': 0.6238341968911917,
 'tu₂': 0.3761658031088083,
 'u': 0.5269541778975741,
 'u[and]CNJ:u:u': 0.6188898094449047,
 'u[and]CNJ:u:u₃': 0.38111019055509526,
 'u₂': 0.47304582210242585,
 'ša': 0.4757853403141361,
 'ša[of]DET:ša:ša': 0.5437537718768859,
 'ša[of]DET:ša:ša₂': 0.456246228123

In [297]:
tm_dist_all = form_dist_matrix(tm_las_counts,map_all)
tm_dist_all

Unnamed: 0,Sin[1]DN:Sin:30,Sin[1]DN:Sin:{d}30,ana[to]PRP:ana:a-na,ana[to]PRP:ana:ana,aš,aš₂,ia,ia₂,li,libbu[interior]N:libbi:ŠA₃,...,ša,ša[of]DET:ša:ša,ša[of]DET:ša:ša₂,šarru[king]N:šarru:LUGAL,šarru[king]N:šarru:MAN,ša₂,šu,šulmu[completeness]N:šulmu:DI-mu,šulmu[completeness]N:šulmu:šul-mu,šu₂
P236880,1.000000,0.000000,1.000000,0.000000,,,,,,0.000000,...,0.000000,0.000000,1.000000,1.0,0.0,1.000000,0.000000,,,1.000000
P236907,,,0.333333,0.666667,0.000000,1.000000,0.250000,0.750000,1.000000,1.000000,...,0.000000,0.000000,1.000000,1.0,0.0,1.000000,0.000000,,,1.000000
P236915,1.000000,0.000000,,,,,,,,,...,0.000000,0.000000,1.000000,1.0,0.0,1.000000,0.000000,,,1.000000
P236916,,,1.000000,0.000000,,,1.000000,0.000000,0.000000,,...,0.000000,0.000000,1.000000,,,1.000000,0.000000,,,1.000000
P236917,1.000000,0.000000,0.000000,1.000000,,,,,,1.000000,...,0.000000,0.000000,1.000000,1.0,0.0,1.000000,0.000000,,,1.000000
P236918,1.000000,0.000000,1.000000,0.000000,,,,,,,...,0.250000,0.333333,0.666667,,,0.750000,0.500000,,,0.500000
P236919,0.666667,0.333333,,,,,,,,,...,0.000000,0.000000,1.000000,,,1.000000,,,,
P236920,1.000000,0.000000,0.500000,0.500000,,,1.000000,0.000000,0.000000,1.000000,...,0.000000,0.000000,1.000000,1.0,0.0,1.000000,0.125000,,,0.875000
P236921,0.800000,0.200000,,,,,,,,1.000000,...,,,,1.0,0.0,,0.000000,,,1.000000
P236922,1.000000,0.000000,,,,,,,,,...,1.000000,1.000000,0.000000,,,0.000000,,,,


In [298]:
for lnf in dict_count_all:
    #print(dict_form_count[lnf])
    tm_dist_all[lnf] = tm_dist_all[lnf].fillna(dict_count_all[lnf])
tm_dist_all

Unnamed: 0,Sin[1]DN:Sin:30,Sin[1]DN:Sin:{d}30,ana[to]PRP:ana:a-na,ana[to]PRP:ana:ana,aš,aš₂,ia,ia₂,li,libbu[interior]N:libbi:ŠA₃,...,ša,ša[of]DET:ša:ša,ša[of]DET:ša:ša₂,šarru[king]N:šarru:LUGAL,šarru[king]N:šarru:MAN,ša₂,šu,šulmu[completeness]N:šulmu:DI-mu,šulmu[completeness]N:šulmu:šul-mu,šu₂
P236880,1.000000,0.000000,1.000000,0.000000,0.259829,0.740171,0.712805,0.287195,0.414361,0.000000,...,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.74928,0.25072,1.000000
P236907,0.692093,0.307907,0.333333,0.666667,0.000000,1.000000,0.250000,0.750000,1.000000,1.000000,...,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.74928,0.25072,1.000000
P236915,1.000000,0.000000,0.835160,0.164840,0.259829,0.740171,0.712805,0.287195,0.414361,0.531394,...,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.74928,0.25072,1.000000
P236916,0.692093,0.307907,1.000000,0.000000,0.259829,0.740171,1.000000,0.000000,0.000000,0.531394,...,0.000000,0.000000,1.000000,0.932367,0.067633,1.000000,0.000000,0.74928,0.25072,1.000000
P236917,1.000000,0.000000,0.000000,1.000000,0.259829,0.740171,0.712805,0.287195,0.414361,1.000000,...,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,0.74928,0.25072,1.000000
P236918,1.000000,0.000000,1.000000,0.000000,0.259829,0.740171,0.712805,0.287195,0.414361,0.531394,...,0.250000,0.333333,0.666667,0.932367,0.067633,0.750000,0.500000,0.74928,0.25072,0.500000
P236919,0.666667,0.333333,0.835160,0.164840,0.259829,0.740171,0.712805,0.287195,0.414361,0.531394,...,0.000000,0.000000,1.000000,0.932367,0.067633,1.000000,0.282997,0.74928,0.25072,0.717003
P236920,1.000000,0.000000,0.500000,0.500000,0.259829,0.740171,1.000000,0.000000,0.000000,1.000000,...,0.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.125000,0.74928,0.25072,0.875000
P236921,0.800000,0.200000,0.835160,0.164840,0.259829,0.740171,0.712805,0.287195,0.414361,1.000000,...,0.475785,0.543754,0.456246,1.000000,0.000000,0.524215,0.000000,0.74928,0.25072,1.000000
P236922,1.000000,0.000000,0.835160,0.164840,0.259829,0.740171,0.712805,0.287195,0.414361,0.531394,...,1.000000,1.000000,0.000000,0.932367,0.067633,0.000000,0.282997,0.74928,0.25072,0.717003


In [238]:
cat_file08 = codecs.open('saa08/catalogue.json','r','utf-8')
cat_file10 = codecs.open('saa10/catalogue.json','r','utf-8')
cat_json08 = json.load(cat_file08)
cat_json10 = json.load(cat_file10)

class_l = []
class_index = []
for pnum in cat_json08['members']:
    id_text        = cat_json08['members'][pnum].get('id_text','')
    designation    = cat_json08['members'][pnum].get('designation','')
    ancient_author = cat_json08['members'][pnum].get('ancient_author','')
    class_d = {'designation': designation,'ancient_author':ancient_author}
    class_index.append(id_text)
    class_l.append(class_d)

for pnum in cat_json10['members']:
    id_text        = cat_json10['members'][pnum].get('id_text','')
    designation    = cat_json10['members'][pnum].get('designation','')
    ancient_author = cat_json10['members'][pnum].get('ancient_author','')
    class_d = {'designation': designation,'ancient_author':ancient_author}
    class_index.append(id_text)
    class_l.append(class_d)

df_class_las = pd.DataFrame(class_l,index=class_index)
df_class_las

Unnamed: 0,ancient_author,designation
P236880,Ašaredu the Older,SAA 08 327
P236907,Zakir,SAA 08 309
P236915,Nabu-šuma-iškun,SAA 08 376
P236916,Ašaredu the Younger,SAA 08 338
P236917,Zakir,SAA 08 303
P236918,Šumaya,SAA 08 499
P236919,Ašaredu the Younger,SAA 08 345
P236920,(unassigned),SAA 08 505
P236921,(unassigned),SAA 08 508
P236922,Nabu-šuma-iškun,SAA 08 375


In [302]:
df_author_certain = df_class_las[(df_class_las['ancient_author'] != '(unassigned)')]

list_author_certain = list(df_author_certain.index)

author_tot = len(df_author_certain['ancient_author'].unique())
#author_tot

tm_author_certain = tm_dist_all.loc[list(set(list_author_certain) & set(list(tm_dist_all.index)))]
#tm_author_certain
print('Total Authors: ' + str(author_tot) + ' in ' + str(len(tm_author_certain.index)) + ' Texts')

Total Authors: 100 in 800 Texts


In [272]:
tm_counts_class_las = pd.concat([tm_las_counts[list_features_all],df_class_las],axis=1)
tm_counts_class_las.to_csv('output/tm_counts_las.csv',encoding='utf-8')

tm_counts_class_author = pd.DataFrame(tm_counts_class_las.groupby('ancient_author').agg('sum'))[list_features_all]
tm_counts_class_author.to_csv('output/tm_counts_author_las.csv',encoding='utf-8')

In [291]:
import itertools
list_power = p(list_ortho_word_pairs)
list_power = [list(itertools.chain.from_iterable(x)) for x in list_power if x != []]
len(list_power)

1023

In [306]:
evaluate_clusters(tm_author_certain,list_power,'ortho_word_las','ancient_author',author_tot,df_class_las)

1. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30']
2. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana']
3. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana']
4. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi']
5. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi']
6. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi']
7. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi']
8. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU']
9. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU']
10. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU']
11. Evaluating: ['Sin[1]DN:Sin:30', '

56. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi']
57. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi']
58. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi']
59. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi']
60. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:

102. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃']
103. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃']
104. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃']
105. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃']
106. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[an

143. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
144. Evaluating: ['lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
145. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
146. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
147. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
148. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
149. Evaluating: ['Sin[1

183. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
184. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
185. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
186. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
187. Evaluating: ['Si

221. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
222. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
223. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
224. Evaluating: ['muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]

253. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
254. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂']
255. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[

292. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
293. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
294. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
295. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N

324. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
325. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
326. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
327. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
328. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'u[and]CNJ:u:u

357. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
358. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
359. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
360. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhh

384. Evaluating: ['ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
385. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
386. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
387. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
388. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
389. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA

418. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
419. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
420. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
421. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu

446. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
447. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
448. Evaluating: ['u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
449. Evaluating: ['Sin[1]DN:Sin:30', 'Si

476. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
477. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
478. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N

502. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
503. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu']
504. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[

538. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
539. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
540. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
541. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
542. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi

573. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
574. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
575. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
576. E

609. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
610. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
611. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
612. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
613. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:li

639. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
640. Evaluating: ['ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
641. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
642. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
643. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:š

675. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
676. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
677. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
678. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:L

704. Evaluating: ['u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
705. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
706. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
707. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
708. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
709. Evaluating: ['Sin[1]DN:Sin:30'

736. Evaluating: ['muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
737. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
738. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
739. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
740. Evaluating:

763. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
764. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
765. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[

794. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
795. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
796. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
797. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā

822. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
823. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
824. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL

849. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
850. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
851. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
852. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šul

875. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
876. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
877. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[

898. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
899. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
900. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
901. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:

926. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
927. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
928. Evaluating: ['muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGA

950. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
951. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
952. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'ša[of]DET:ša:ša', '

974. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
975. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
976. Evaluating: ['lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarr

998. Evaluating: ['ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
999. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
1000. Evaluating: ['lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', '

1019. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'ana[to]PRP:ana:a-na', 'ana[to]PRP:ana:ana', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
1020. Evaluating: ['libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MOD:lā:la', 'lā[not]MOD:lā:NU', 'lū[may]MOD:lū:lu', 'lū[may]MOD:lū:lu-u', 'muhhu[skull]N:muhhi:UGU', 'muhhu[skull]N:muhhi:UGU-hi', 'u[and]CNJ:u:u', 'u[and]CNJ:u:u₃', 'ša[of]DET:ša:ša', 'ša[of]DET:ša:ša₂', 'šulmu[completeness]N:šulmu:DI-mu', 'šulmu[completeness]N:šulmu:šul-mu', 'šarru[king]N:šarru:LUGAL', 'šarru[king]N:šarru:MAN']
1021. Evaluating: ['Sin[1]DN:Sin:30', 'Sin[1]DN:Sin:{d}30', 'libbu[interior]N:libbi:ŠA₃', 'libbu[interior]N:libbi:ŠA₃-bi', 'lā[not]MO

Unnamed: 0,class_type,feature_type,group,group_len,num_clusters,purity_class,purity_random,error_sub,error_div
0,ancient_author,ortho_word_las,"[Sin[1]DN:Sin:30, Sin[1]DN:Sin:{d}30]",2,100,0.13000,0.10125,0.02875,1.283951
1,ancient_author,ortho_word_las,"[ana[to]PRP:ana:a-na, ana[to]PRP:ana:ana]",2,100,0.14375,0.11000,0.03375,1.306818
2,ancient_author,ortho_word_las,"[Sin[1]DN:Sin:30, Sin[1]DN:Sin:{d}30, ana[to]P...",4,100,0.22750,0.17375,0.05375,1.309353
3,ancient_author,ortho_word_las,"[libbu[interior]N:libbi:ŠA₃, libbu[interior]N:...",2,100,0.11250,0.10125,0.01125,1.111111
4,ancient_author,ortho_word_las,"[Sin[1]DN:Sin:30, Sin[1]DN:Sin:{d}30, libbu[in...",4,100,0.18625,0.14875,0.03750,1.252101
5,ancient_author,ortho_word_las,"[ana[to]PRP:ana:a-na, ana[to]PRP:ana:ana, libb...",4,100,0.19500,0.16000,0.03500,1.218750
6,ancient_author,ortho_word_las,"[Sin[1]DN:Sin:30, Sin[1]DN:Sin:{d}30, ana[to]P...",6,100,0.25750,0.20250,0.05500,1.271605
7,ancient_author,ortho_word_las,"[lā[not]MOD:lā:la, lā[not]MOD:lā:NU]",2,100,0.09875,0.10250,-0.00375,0.963415
8,ancient_author,ortho_word_las,"[Sin[1]DN:Sin:30, Sin[1]DN:Sin:{d}30, lā[not]M...",4,100,0.17125,0.13625,0.03500,1.256881
9,ancient_author,ortho_word_las,"[ana[to]PRP:ana:a-na, ana[to]PRP:ana:ana, lā[n...",4,100,0.17750,0.15375,0.02375,1.154472


text_id
P313453    1
P313480    2
P313814    1
P314321    1
P334113    3
P334158    1
P334179    6
P334201    1
P334360    1
P334588    1
P334610    1
P334729    4
P334831    1
X900004    2
X900012    1
Name: cf, dtype: int64