In [58]:
import requests
import pandas as pd
import numpy as np
import plotly.express as px

from bs4 import BeautifulSoup, NavigableString
from collections import defaultdict

import re
import os

In [59]:
# taken directly from Dr. Crane's 'TreebankCount.ipynb'
tenses = {'p':'present','f':'future', 'i':'imperfect','a':'aorist','r':'perfect','l':'pluperfect','t':'future perfect'}
voices = {'a':'active','m':'middle','p':'passive', 'i':'imperfect','e':'middle-passive'}
moods = {'i':'indicative','s':'subjunctive', 'o':'optative','m':'imperative','p':'participle','n':'infinitive'}

authinfo = {}
authinfo['tlg0011'] = 'Sophocles,-497,-406,poetry,drama'
authinfo['tlg1220'] = 'Batrachomyomachia,-100,-1,poetry,hexameter'
authinfo['tlg0013'] = 'Homeric Hymns,-650,-450,poetry,hexameter'
authinfo['tlg0020'] = 'Hesiod,-750,-650,poetry,hexameter'
authinfo['tlg0026'] = 'Aeschines,-389,-314,prose,orator'
authinfo['tlg0058'] = 'Aeneas Tacticus,301,400,prose,misc'
authinfo['tlg0096'] = 'Aesop,-620,-564,prose,misc'
authinfo['tlg0085'] = 'Aeschylus,-525c,-455c,poetry,drama'
authinfo['tlg0028'] = 'Antiphon,-490,-411,prose,orator'
authinfo['tlg0551'] = 'Appian,-490,-411,prose,history'
authinfo['tlg0086'] = 'Aristotle,-384,-322,prose,philosophy'
authinfo['tlg0019'] = 'Aristophanes,-446,-386,poetry,drama'
authinfo['tlg0008'] = 'Athenaeus,170,223,prose,misc'
authinfo['tlg0554'] = 'Chariton,101,200,prose,misc'
authinfo['tlg0041'] = 'Chionis Epistulae,1,200,prose,misc'
authinfo['tlg0627'] = 'Corpus Hippocraticum,-450,-350,prose,misc'
authinfo['tlg0014'] = 'Demosthenes,-384,-322,prose,orator'
authinfo['tlg0060'] = 'Diodorus Siculus,-90c,-30c,prose,history'
authinfo['tlg0081'] = 'Dionysius of Halicarnassus,-60c,-7c,prose,history'
authinfo['tlg0557'] = 'Epictetus,50,135,prose,philosopy'
authinfo['tlg0537'] = 'Epicurus,-341,-270,prose,philosophy'
authinfo['tlg0343'] = 'Ezechiel the Tragic Poet,-200,-101,poetry,drama'
authinfo['tlg0006'] = 'Euripides,-480c,-406c,poetry,drama'
authinfo['tlg0016'] = 'Herodotus,-484c,-425c,prose,history'
authinfo['tlg0559'] = 'Heron of Alexandria,10,70,prose,misc'
authinfo['tlg0010'] = 'Isocrates,-436,-338,prose,orator'
authinfo['tlg0526'] = 'Josephus,37,100c,prose,history'
authinfo['tlg2003'] = 'Julian the Apostate,331,363,prose,misc'
authinfo['tlg0561'] = 'Longus,101,200,prose,misc'
authinfo['tlg0061'] = 'Pseudo-Lucian,201,400,prose,misc'
authinfo['tlg0062'] = 'Lucian,125,180,prose,misc'
authinfo['tlg0540'] = 'Lysias,-445c,-380c,prose,orator'
authinfo['tlg0541'] = 'Menander,-342,-291,poetry,drama'
authinfo['tlg0255'] = 'Mimnermus,-650,-600,poetry,lyreleg'
authinfo['tlgX208'] = 'Paeanius,301,400,prose,misc'
authinfo['tlg0585'] = 'Phlegon,151,200,prose,misc'
authinfo['tlg0059'] = 'Plato,-428c,-347c,prose,philosophy'
authinfo['tlg0007'] = 'Plutarch,46,119,prose,history'
authinfo['tlg0543'] = 'Polybius,-200,-118,prose,history'
authinfo['tlg4029'] = 'Procopius,500,565,prose,history'
authinfo['tlg0009'] = 'Sappho,-630,-570,poetry,lyreleg'
authinfo['tlg0260'] = 'Semonides,-700,-601,poetry,lyreleg'
authinfo['tlg0527'] = 'Septuagint,-250,-100,prose,bible'
authinfo['tlg0544'] = 'Sextus Empiricus,150,250,prose,philosophy'
authinfo['tlg0032'] = 'Xenophon,-430c,-354,prose,history'
authinfo['tlg0005'] = 'Theocritus,-300,-255,poetry,hexameter'
authinfo['tlg0003'] = 'Thucydides,-460c,-400c,prose,history'
authinfo['tlg0093'] = 'Theophrastus,-371,-287,prose,philosophy'
authinfo['tlg0012'] = 'Homer,-775c,-700c,poetry,hexameter'
authinfo['tlg0031'] = 'New Testament,80,100,prose,bible'
authinfo['tlg3143'] = 'Georgius Sphrantzes,1480,1050,prose,history'

In [60]:
# change ROOT to local root
#ROOT = "c:\\Users\\bella\\OneDrive\\Documents"
ROOT = "/Users/bellahwang/Documents"

LOCALPATH = os.path.join(ROOT, 'GitHub')
GORPATH = os.path.join(LOCALPATH, 'gorman-trees', 'public', 'xml')
PEDPATH = os.path.join(LOCALPATH, 'pedalion-trees', 'public', 'xml')
MAMPATH = os.path.join(LOCALPATH, 'gAGDT', 'data', 'xml')
PROPATH = os.path.join(LOCALPATH, 'proiel-treebank')

In [61]:
# taken directly from Dr. Crane's 'TreebankCount.ipynb'
def addfiles(dirname,flist):
    for foo in sorted(os.listdir(dirname)):
        if(re.search('proiel', dirname)):
            if(not re.search('(chron|greek-nt|hdt)\.xml', foo)):
                continue
        elif(not re.search('\.xml$', foo)):
            continue
        newpath = os.path.join(dirname, foo)
        flist.append(newpath)
    return(flist)

def cleanFiles(FILENAME):
    with open(FILENAME, 'r', encoding="utf-8") as f:
        f = re.sub('λέγω3','λέγω', str(f))
        f = re.sub('part-of-speech="(.)." morphology="(........).."','postag="\g<1>\g<2>"', str(f))
        f = re.sub("(word|id|form|lemma|postag|relation|head)='([^']+)'",'\g<1>="\g<2>"', str(f))

In [62]:
searchfiles = []
searchfiles = addfiles(GORPATH, searchfiles)
searchfiles = addfiles(PEDPATH, searchfiles)
searchfiles = addfiles(MAMPATH, searchfiles)
searchfiles = addfiles(PROPATH, searchfiles)

for i in searchfiles:
    cleanFiles(i)

In [98]:
authList = defaultdict(list)
PostagExcept = ['""', '_', '-', 'c']

def findTLG(FILENAME):
    authTLG = 'dummy'
    returnList = []
    with open(FILENAME, 'r', encoding="utf-8") as f:
        soup = BeautifulSoup(f, "xml")
        
        # for proiel trees
        if (re.search("proiel", FILENAME)):
            if (re.search("greek-nt\.xml", FILENAME)):
                authTLG = 'tlg0031'
            elif (re.search("chron\.xml", FILENAME)):
                authTLG = 'tlg3143'
            elif (re.search("hdt\.xml", FILENAME)):
                authTLG = 'tlg0016'
                
            for token in soup('token'):
                if token.has_attr('part-of-speech'):
                    pos   = token['part-of-speech']
                    morph = token['morphology']
                    tense = morph[2]
                    mood  = morph[3]
                    voice = morph[4]
                    if (pos == 'V-'):
                        wordList = []
                        wordList.append(authTLG)
                        wordList.append(token['form'])
                        wordList.append(token['lemma'])
                        wordList.append(tense)
                        wordList.append(mood)
                        wordList.append(voice)
                        wordList.append(tense + mood + voice)
                        wordList.append(morph)
                        returnList.append(wordList)
                
        # for gorman, pedalion, and gAGDT trees
        else:
            if(re.search("papyri", FILENAME)):
                authTLG = 'papyri'
                for word in soup('word'):    
                    # find form, lemma, postag
                    if word.has_attr('postag'):
                        postag = word['postag']
                        if not postag in PostagExcept and postag:
                            #print(postag)
                            pos    = postag[0]
                            #person = postag[1]
                            #number = postag[2]
                            tense  = postag[3]
                            mood   = postag[4]
                            voice  = postag[5]
                            #gender = postag[6]
                            #case   = postag[7]
                            if (pos == 'v'):
                                wordList = []
                                wordList.append(authTLG)
                                wordList.append(word['form'])
                                wordList.append(word['lemma'])
                                wordList.append(tense)
                                wordList.append(mood)
                                wordList.append(voice)
                                wordList.append(tense + mood + voice)
                                wordList.append(postag)
                                returnList.append(wordList)
            elif(re.search('example-sentences', FILENAME)):
                return
            else:
                for sentence in soup('sentence'):
                    docID = sentence['document_id']
                    # find author ID
                    if ("urn:cts:greekLit:" in docID):
                        docIDList = docID.split(':')
                        if ("tlg" in docIDList[3]):
                            authTLG = docIDList[3].split('.')[0]
                        else:
                            authTLG = docIDList[-1].split('.')[0]
                    elif ("Perseus:text:" in docID):
                        authTLG = 'tlg0008'
                    elif (re.search(r'....-...', docID)):
                        authTLG = 'tlg' + docID.split('-')[0]
                    elif ("NT" in docID):
                        authTLG = 'tlg0031'
                    elif ("Ps-Luc" in docID):
                        authTLG = 'tlg0061'
                    elif ("Paean" in docID):
                        authTLG = 'tlgX208'
                    elif ("Genesis" in docID):
                        authTLG = 'tlg0527'
                    elif ("Chilia" in docID):
                        authTLG = 'Chilia'
                    elif ("Pedalion" in docID):
                        authTLG = 'Pedalion'
                    elif ("Mimn" in docID):
                        authTLG = 'tlg0255'
                    elif ("0260" in docID):
                        authTLG = 'tlg0260'
                    elif ("0005" in docID):
                        authTLG = 'tlg0005'
                    else:
                        print(docID)
                        
                    for word in sentence('word'):    
                        # find form, lemma, postag
                        if word.has_attr('postag'):
                            postag = word['postag']
                            if not postag in PostagExcept and postag:
                                #print(postag)
                                pos    = postag[0]
                                #person = postag[1]
                                #number = postag[2]
                                tense  = postag[3]
                                mood   = postag[4]
                                voice  = postag[5]
                                #gender = postag[6]
                                #case   = postag[7]
                                if (pos == 'v'):
                                    wordList = []
                                    wordList.append(authTLG)
                                    wordList.append(word['form'])
                                    wordList.append(word['lemma'])
                                    wordList.append(tense)
                                    wordList.append(mood)
                                    wordList.append(voice)
                                    wordList.append(tense + mood + voice)
                                    wordList.append(postag)
                                    returnList.append(wordList)
                                    #print(wordList)
                                    #authList[authTLG].append(wordList)
    return returnList

In [99]:
searchfiles = []
searchfiles = addfiles(GORPATH, searchfiles)
searchfiles = addfiles(PEDPATH, searchfiles)
searchfiles = addfiles(MAMPATH, searchfiles)
searchfiles = addfiles(PROPATH, searchfiles)
#FILENAME = os.path.join(GORPATH, 'dem-59-neaira-2019.xml')


DFList = []
for i in searchfiles:
#print(i)
    data = findTLG(i)
    df = pd.DataFrame(data, columns = ['Author', 'Form', 'Lemma', 'Tense', 'Mood', 'Voice', 'Tense+Mood+Voice','POStag'])
    DFList.append(df)

In [100]:
master_df = pd.concat(DFList, ignore_index=True)
print(master_df)

         Author          Form         Lemma Tense Mood Voice Tense+Mood+Voice  \
0       tlg0026    γραψάμενος         γράφω     a    p     m              apm   
1       tlg0026       λυπήσας         λυπέω     a    p     a              apa   
2       tlg0026        νομίζω        νομίζω     p    i     a              pia   
3       tlg0026    παρεσχηκώς        παρέχω     r    p     a              rpa   
4       tlg0026          ὁρῶν          ὁράω     p    p     a              ppa   
...         ...           ...           ...   ...  ...   ...              ...   
273579  tlg0016     ἐκφυγόντα       ἐκφεύγω     a    p     a              apa   
273580  tlg0016  περιγενέσθαι  περιγίγνομαι     a    n     m              anm   
273581  tlg0016    ἐμηχανῶντο    μηχανάομαι     i    i     m              iim   
273582  tlg0016   περιέπεμπον     περιπέμπω     i    i     a              iia   
273583  tlg0016     ἐποιεῦντο         ποιέω     i    i     m              iim   

            POStag  
0     

In [74]:
#general verb counts
master_df['Tense+Mood+Voice'].value_counts()

pia    30700
aia    28481
ppa    28343
iia    23458
apa    17744
       ...  
-ie        1
paa        1
po-        1
a--        1
tna        1
Name: Tense+Mood+Voice, Length: 142, dtype: int64

In [88]:
# verb counts
master_df.groupby("Author", sort=False)["Tense+Mood+Voice"].count()

Author
tlg0026      2914
tlg0028      3374
tlg0551      4476
tlg0086      3533
tlg0008     14913
tlg0014     12711
tlg0060      4443
tlg0081      5122
tlg0016     23269
tlg0526      4357
tlg0540      4976
tlg0059      3347
tlg0007      6922
tlg0543     17331
tlg0032     20591
tlg0003     10619
tlg0019      3274
tlg0058      1357
tlg0096      1611
tlg1220       428
tlg0554      1174
tlg0031     27140
tlg0085      7997
tlg0012     41455
tlg0011      9137
tlg0061      2170
tlgX208       932
tlg0527      2766
Chilia        542
tlg0541      1577
tlg0006      1848
tlg0062      1674
Pedalion      664
tlg0020      2772
tlg0041       994
tlg0557      1276
tlg0537       257
tlg0627         5
tlg0343       317
tlg0559      1683
tlg0010       101
tlg2003       257
tlg0561       116
tlg0255        28
tlg0585      1011
tlg4029      4899
tlg0009       436
tlg0260       134
tlg0544      2352
tlg0005        42
tlg0093      1697
tlg0013       663
tlg3143      3496
Name: Tense+Mood+Voice, dtype: int64

In [93]:
# verb counts based on tense+mood+voice
verbFormCt = master_df.groupby(["Author", "Tense+Mood+Voice"], sort=False)["Tense+Mood+Voice"].size().reset_index(name='count')
verbFormCt

Unnamed: 0,Author,Tense+Mood+Voice,count
0,tlg0026,apm,39
1,tlg0026,apa,132
2,tlg0026,pia,400
3,tlg0026,rpa,90
4,tlg0026,ppa,278
...,...,...,...
2796,tlg0016,tip,1
2797,tlg0016,iom,1
2798,tlg0016,pmp,1
2799,tlg0016,rsm,1


In [95]:
fig1 = px.treemap(verbFormCt, path=[px.Constant('Verb Tenses'), 'Author', 'Tense+Mood+Voice'], values='count')
fig1.show()

In [96]:
fig2 = px.icicle(verbFormCt, path=[px.Constant('Verb Tenses'), 'Author', 'Tense+Mood+Voice'], values='count')
fig2.show()

In [97]:
fig3 = px.sunburst(verbFormCt, path=[px.Constant('Verb Tenses'), 'Author', 'Tense+Mood+Voice'], values='count')
fig3.show()

In [40]:
formcnt = 0
formlist = {}
lemmacnt = 0
lemmalist = {}
nounlist = {}
authorwordcnt = {}
prosewordcnt = 0
poetrywordcnt = 0
verblist = {}
verbforms = []
genrelist = []
persids = {}
poetrywordcnt = 0
prosewordcnt = 0
authorlemmas = {}

In [41]:
def Forms(FILENAME):
    
    prosevpoetry =''
    curgenre = ''
    curformcnt = 0
    curlemmas = {}
    curauthid = ''
    
    if(re.search("papyri", FILENAME)):
        curauthid = 'tlg0541'
        prosevpoetry = 'prose'
        curgenre = 'misc'
        if(not curauthid in persids):
            authorwordcnt[curauthid] = 0
        persids[curauthid] = FILENAME
    elif(re.search("greek-nt\.xml", FILENAME)):
        curauthid = 'tlg0031'
        prosevpoetry = 'prose'
        curgenre = 'bible'
        if(not curauthid in persids):
            authorwordcnt[curauthid] = 0
        persids[curauthid] = FILENAME
    elif(re.search("chron\.xml", FILENAME)):
        curauthid = 'tlg3143'
        prosevpoetry = 'prose'
        curgenre = 'history'
        if(not curauthid in persids):
            authorwordcnt[curauthid] = 0
        persids[curauthid] = FILENAME
    elif(re.search("proiel-treebank\/hdt\.xml", FILENAME)):
        curauthid = 'tlg0016'
        prosevpoetry = 'prose'
        curgenre = 'history'
        if(not curauthid in persids):
            authorwordcnt[curauthid] = 0
        persids[curauthid] = FILENAME
    elif(re.search('example-sentences', FILENAME)):
        return
    
    with open(FILENAME, 'r', encoding="utf-8") as f:
        soup = BeautifulSoup(f, "xml")
        
        curtype = ''
        postag = ''
        if(re.search('#', str(f))):
            return
        elif(re.search("proiel-treebank\/hdt\.xml", str(f)) and re.search('citation-part="1.', str(f))):
            print('skipping', str(f))
            return
            
        m = re.search('postag="(.)', str(f))
        if(m):
            curtype = m[1].lower()
            if(curtype == 'u'):
                return

        if(re.search("ref='Leuven\|Paean", str(f))):
            curauthid = 'tlgX208'
            if(not curauthid in persids):
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
        elif(re.search("ref='Leuven\|Mimn", str(f))):
            curauthid = 'tlg0255'
            if(not curauthid in persids):
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
        elif(re.search("ref='Leuven\|0260", str(f))):
            curauthid = 'tlg0260'
            if(not curauthid in persids):
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
        elif(re.search("ref='Leuven\|0005", str(f))):
            curauthid = 'tlg0005'
            if(not curauthid in persids):
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
        elif(re.search("ref='Leuven\|Ps-Luc", str(f))):
            curauthid = 'tlg0061'
            if(not curauthid in persids):
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
        elif(re.search("ref='Leuven\|Genesis", str(f))):
            curauthid = 'tlg0527'
            if(not curauthid in persids):
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
        
        if(curauthid in authinfo):
            res = authinfo[curauthid].split(',')
            prosevpoetry = res[3]
            curgenre = res[4]

        m = re.search('document_id="([0-9]+)\-([0-9]+)', str(f))
        if(m):
            curdocid = 'urn:cts:greekLit:tlg' + m[1] + '.' +'tlg' + m[1]
            m = re.search('(tlg[0-9][0-9][0-9][0-9])', curdocid)
            if(m):
                curauthid = m[1]
            else:
                print('badid', curdocid)
                curauthid = 'noid'
            if(not curauthid in persids):
                print(curauthid, FILENAME)
                authorwordcnt[curauthid] = 0
            persids[curauthid] = FILENAME 
            
        m = re.search('document_id="(http://perseids.org[^"]+|http://data.perseus.org[^"]+|http://perseids.org/annotsrc/urn:cts:greekLit:[^"]+|Perseus[^"]+|urn:[^"]+)"', str(f))
        if(m):
            curdocid = m[1]
            m = re.search('(tlg[0-9][0-9][0-9][0-9])', curdocid)
            if(m):
                curauthid = m[1]
            elif(curdocid == 'Perseus:text:2008.01.0405'):
                curauthid = 'tlg0008'
            else:
                print('badid3', curdocid)
                curauthid = 'noid'
           
            if(not curauthid in persids):
                print(curauthid, FILENAME)
                authorwordcnt[curauthid] = 0
                persids[curauthid] = FILENAME
            elif(curauthid == 'noid'):
                prosevpoetry = "na"
                curgenre = 'misc'
            else:
                res = authinfo[curauthid].split(',')
                prosevpoetry = res[3]
                curgenre = res[4]

            
        m = re.search('postag="([^"]+)"', str(f))
        if(m):
            postag = m[1].lower()
            
        m = re.search('form="([^"]+)"', str(f))
        if(m):
            formcnt = formcnt + 1
            curformcnt = curformcnt + 1
            if(not curauthid):
                print('noauthid', FILENAME, f)
            else:
                authorwordcnt[curauthid] = authorwordcnt[curauthid] + 1
            curform = m[1]
            if(curform in formlist):
                formlist[curform] = formlist[curform] + 1
            else:
                formlist[curform] = 1
        
        m = re.search('lemma="([^"]+)"', str(f))
        if(m):
            if(not curtype):
                print('notype', f ,end='')
                return
            lemmacnt = lemmacnt + 1
            curlemma = m[1]
            
            if(curtype =='n'):
                if(not curlemma in nounlist):
                    nounlist[curlemma] = 1
                else:
                    nounlist[curlemma] = nounlist[curlemma] + 1
            if(curtype =='v' ):
                m = re.search('^v(.)(.)(.)(.)(.)', postag)
                if(m):
                    curperson = m[1]
                    curnumber = m[2]
                    if(m[3] in tenses):
                        curtense = tenses[m[3]]
                    else:
                        curtense = m[1]
                    if(m[4] in moods):
                        curmood = moods[m[4]]
                    else:
                        curmood = m[4]
                    if(m[5] in voices):
                        curvoice = voices[m[5]]
                    else:
                        curvoice = m[5]
                else:
                    curnumber = 'unk'
                    curperson = 'unk'
                    curtense = 'unk'
                    curvoice = 'unk'
                    curmood = 'unk'
                verbforms.append([curform, curlemma, curauthid, prosevpoetry, curgenre, curperson, curnumber, curtense, curvoice, curmood, 1])
                if( not curlemma in verblist):
                    verblist[curlemma] = 1
                else:
                    verblist[curlemma] = verblist[curlemma] + 1
            
            if(curauthid):
                if(not curauthid in authorlemmas):
                    authorlemmas[curauthid] = {}
                if(curlemma in authorlemmas[curauthid]):
                    authorlemmas[curauthid][curlemma] = authorlemmas[curauthid][curlemma] + 1
                else:
                    authorlemmas[curauthid][curlemma] = 1
                    
            if(curlemma in lemmalist):
                lemmalist[curlemma] = lemmalist[curlemma] + 1
            else:
                lemmalist[curlemma] = 1
            if(curlemma in curlemmas):
                curlemmas[curlemma] = curlemmas[curlemma] + 1
            else:
                curlemmas[curlemma] = 1
        
    print(FILENAME, prosevpoetry, curformcnt, len(curlemmas), len(lemmalist))

In [42]:
for i in searchfiles:
    Forms(i)

c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aeschines-1-1-50-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aeschines-1-101-150-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aeschines-1-151-196-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aeschines-1-51-100-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\antiphon-1-bu2.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\antiphon-2-bu2.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\antiphon-5-bu2.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\antiphon-6-bu2.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\appian-bc-1-0-1-4-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\appian-bc-1-11-14-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gor

c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\josephus-bj-1-16-20-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\josephus-bj-1-21-25-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\josephus-bj-1-3-5-bu2.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\josephus-bj-1-6-10-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-1-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-12-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-13-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-14-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-15.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-19-bu1.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lys

c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\heron.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\iso.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\julian.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\longus.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\lucian_lis.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\lucian_prometheus.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\lucian_symposion.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\lysias_or24.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\menander_dyskolos.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\mimn.xml  0 0 0
c:\Users\bella\OneDrive\Documents\GitHub\pedalion-trees\public\xml\paean.xml  0 0 0
c:\Users\bella\OneDrive\Documen

In [43]:
#FILENAME = os.path.join(PEDPATH, 'chion.xml')
FILENAME = os.path.join(GORPATH, 'dem-59-neaira-2019.xml')
#FILENAME = os.path.join(MAMPATH, 'tlg0003.tlg001.perseus-grc1.1.tb.xml')

#list of exceptions
AuthorExcept = ['N/A']
PostagExcept = ['""', '_', '-', 'c']
            

def returnTLG(listOfCite):
    for i in listOfCite:
        if i.startswith('tlg'):
            return i
    return "N/A"

def VerbTenses(FILENAME):
    with open(FILENAME, 'r', encoding="utf-8") as f:
        soup = BeautifulSoup(f, "xml")
        for sentence in soup('sentence'):
            for word in sentence('word'):
                if word.has_attr('postag'):
                    postag = word['postag']
                    #print(postag)
                    if not postag in PostagExcept and postag:
                        #print(postag)
                        pos    = postag[0]
                        person = postag[1]
                        number = postag[2]
                        tense  = postag[3]
                        mood   = postag[4]
                        voice  = postag[5]
                        #gender = postag[6]
                        #case   = postag[7]

                        if (pos == 'v'):
                            category = tense + mood + voice
                            verbForms[category] += 1
        print(verbForms)
        return verbForms

In [58]:


#VerbTenses(FILENAME)

for i in searchfiles:
    VerbTenses(i)

Aeschines
urn:cts:greekLit:tlg0026.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aeschines-1-1-50-bu1.xml
defaultdict(<class 'int'>, {'apm': 7, 'apa': 25, 'pia': 119, 'rpa': 30, 'ppa': 66, 'ppe': 36, 'fia': 9, 'aim': 15, 'pna': 110, 'ana': 19, 'aia': 35, 'ria': 18, 'pne': 13, 'fip': 1, 'iia': 22, 'rne': 1, 'fim': 18, 'anm': 10, 'pie': 40, 'rpe': 17, 'rma': 2, 'pse': 11, 'psa': 31, 'asa': 16, 'ame': 5, 'anp': 3, 'fpe': 1, 'rsa': 4, 'aie': 4, 'app': 8, 'fnm': 3, 'pma': 25, 'pme': 6, 'asp': 8, 'pim': 2, 'rie': 4, 'ase': 3, 'ama': 11, 'amp': 1, 'aoa': 5, 'aip': 4, 'fpm': 2, 'asm': 5, 'iie': 10, 'fna': 3, 'ape': 2, 'rnm': 1, 'aom': 2, 'fie': 1, 'lia': 1, 'iim': 1, 'ane': 2})
Aeschines
urn:cts:greekLit:tlg0026.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aeschines-1-101-150-bu1.xml
defaultdict(<class 'int'>, {'lie': 1, 'ria': 19, 'fia': 23, 'app': 7, 'aim': 15, 'iia': 32, 'rpe': 11, 'aia': 51, 'pia': 110, 'ppa': 6

Aristotle
urn:cts:greekLit:tlg0086.tlg035.perseus-grc1:1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aristotle-politics-book-1-bu1.xml
defaultdict(<class 'int'>, {'pia': 548, 'ppa': 130, 'rpa': 5, 'pna': 196, 'pie': 69, 'ppe': 66, 'rsa': 1, 'psa': 8, 'fim': 14, 'rpe': 23, 'ana': 19, 'app': 18, 'aoa': 9, 'pne': 30, 'rie': 11, 'fom': 1, 'aia': 14, 'apa': 12, '___': 10, 'ria': 24, 'iie': 5, 'iia': 49, 'poa': 7, 'fia': 7, 'lia': 2, 'pma': 2, 'asa': 6, 'rna': 2, 'poe': 1, 'fnp': 1, 'aim': 6, 'ppm': 3, 'pnm': 2, 'aom': 2, 'rne': 1, 'anm': 7, 'fpa': 1, 'asm': 1, 'anp': 1, 'rme': 2, 'fpm': 3, 'pse': 6, 'apm': 8, 'aip': 3, 'pp-': 1, 'iim': 1, 'fip': 1, 'ase': 1})
Aristotle
urn:cts:greekLit:tlg0086.tlg035.perseus-grc1:2
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\aristotle-politics-book-2-bu2.xml
defaultdict(<class 'int'>, {'pie': 106, 'ana': 48, 'ppe': 73, 'pna': 415, 'iia': 107, 'ane': 7, 'pse': 16, 'pne': 88, 'pia': 563, 'rpe': 39, 'ppa': 217, 'asp': 

Athenaeus
urn:cts:greekLit:tlg0008.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\athen12-70-81-2019.xml
defaultdict(<class 'int'>, {'pia': 101, 'ppe': 30, 'apa': 38, 'iia': 43, 'app': 8, 'pna': 37, 'ppa': 87, 'poa': 5, 'aoa': 3, 'aia': 26, 'aim': 13, 'ria': 9, 'ana': 9, 'rna': 5, 'apm': 13, 'poe': 2, 'pne': 16, 'rne': 3, 'rie': 2, 'iie': 15, 'aip': 2, 'anp': 6, 'asa': 4, '___': 22, 'rpe': 8, 'pie': 18, 'rpa': 5, 'lia': 3, 'ape': 3, 'lie': 1, 'ppm': 2, 'anm': 3, 'fna': 1, 'fom': 1, 'pma': 3, 'psa': 1, 'fim': 1, 'pse': 1, 'rsa': 1, 'aop': 2, 'iim': 1, 'fia': 8, 'pmm': 2})
Athenaeus
urn:cts:greekLit:tlg0008.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\athen12-70-81-jan-15.xml
defaultdict(<class 'int'>, {'pia': 79, 'ppe': 31, 'apa': 38, 'iia': 40, 'app': 8, 'pna': 36, 'ppa': 85, 'poa': 5, 'aoa': 3, 'aia': 26, 'aim': 13, 'ria': 6, 'ana': 10, 'rna': 5, 'apm': 16, 'poe': 3, 'pne': 15, 'rne': 3, 'rie': 2, 'iie': 12,

Athenaeus
urn:cts:greekLit:tlg0008.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\athen13-60-69-2019.xml
defaultdict(<class 'int'>, {'pia': 73, 'rna': 3, 'pne': 4, 'fnm': 3, 'ppe': 30, 'pna': 20, 'ppa': 53, 'iia': 38, 'iie': 4, 'fna': 1, 'asa': 2, 'aia': 39, 'lia': 3, 'pie': 5, 'ria': 7, 'aim': 8, 'apa': 30, 'asm': 3, 'aip': 9, 'ama': 3, 'pnm': 1, 'pme': 1, 'rie': 5, '___': 15, 'anm': 5, 'ana': 11, 'ppm': 1, 'rma': 1, 'fpa': 1, 'app': 14, 'poe': 1, 'pga': 2, 'rpa': 4, 'rpe': 7, 'apm': 7, 'aoa': 4, 'fpm': 1, 'psa': 2, 'fim': 4, 'ame': 1, 'ase': 1, 'rne': 1, 'fia': 2})
Athenaeus
urn:cts:greekLit:tlg0008.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\athen13-60-69-jan-15.xml
defaultdict(<class 'int'>, {'pia': 72, 'rna': 3, 'pne': 4, 'fnm': 3, 'ppe': 30, 'pna': 20, 'ppa': 53, 'iia': 38, 'iie': 4, 'fna': 1, 'asa': 2, 'aia': 39, 'lia': 3, 'pie': 5, 'ria': 7, 'aim': 8, 'apa': 30, 'asm': 3, 'aip': 9, 'ama': 3, 'pnm': 1

Demosthenes
urn:cts:greekLit:tlg0014.tlg018.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\demosthenes-18-51-100-bu1.xml
defaultdict(<class 'int'>, {'pia': 66, 'aia': 52, 'ppa': 100, 'apa': 39, 'app': 11, 'aoa': 8, 'pie': 11, 'pna': 61, 'apm': 9, 'ama': 2, 'fia': 12, 'ane': 3, 'ana': 38, 'rpe': 18, 'rpa': 16, 'asa': 9, 'pma': 10, 'ppe': 27, 'poa': 3, 'pse': 7, 'pne': 13, 'psa': 9, 'fna': 1, 'fie': 2, 'fip': 1, 'ria': 19, 'pim': 3, 'iia': 59, 'fim': 5, 'rie': 16, 'aim': 12, 'aip': 11, 'anm': 13, 'fpm': 1, 'iim': 1, 'aom': 2, 'iie': 17, 'aie': 1, 'ape': 4, 'lia': 3, 'aop': 1, 'ppm': 3, 'asp': 4, 'anp': 6, 'fpa': 4, 'asm': 1, 'fnm': 3, 'rne': 6, 'lie': 3, 'rip': 2, 'ina': 1})
Demosthenes
urn:cts:greekLit:tlg0014.tlg004.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\demosthenes-4-phil1-bu1.xml
defaultdict(<class 'int'>, {'iie': 3, 'pna': 117, 'apa': 24, 'rpa': 8, 'aim': 4, 'iia': 24, 'app': 6, 'psa': 28, 'ria': 23, 'pia': 145, '

Dionysius of Halicarnassus
urn:cts:greekLit:tlg0081.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\dion-hal-1-16-30-bu1.xml
defaultdict(<class 'int'>, {'pie': 32, 'anm': 15, 'apa': 53, 'fpm': 15, 'ppa': 82, 'iia': 71, 'apm': 36, 'app': 26, 'aoa': 8, 'pna': 41, 'rpa': 9, 'poa': 6, 'ppe': 55, 'aom': 1, 'iie': 16, 'aop': 1, 'pia': 72, 'aia': 61, 'ana': 23, 'aim': 23, 'fnp': 1, 'iim': 2, 'ppm': 1, 'aip': 18, 'rpe': 10, 'fpa': 2, 'pne': 11, 'pma': 2, 'ama': 1, 'ppp': 1, 'ria': 4, 'anp': 9, 'poe': 1, 'fna': 2, 'pp-': 1, 'rne': 3, 'ape': 1, 'psa': 1, 'rie': 6, 'ane': 3, 'rna': 1, 'fip': 1})
Dionysius of Halicarnassus
urn:cts:greekLit:tlg0081.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\dion-hal-1-31-45-bu1.xml
defaultdict(<class 'int'>, {'pie': 46, 'pia': 80, 'apa': 45, 'iim': 1, 'ppe': 73, 'pna': 41, 'poa': 9, 'apm': 29, 'ppa': 65, 'aip': 8, 'app': 18, 'aia': 42, 'iia': 52, 'rpa': 9, 'iie': 14, 'rpe': 25, 'fna': 1,

Herodotus
urn:cts:greekLit:tlg0016.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\hdt-1-40-59-bu2-2019.xml
defaultdict(<class 'int'>, {'pie': 22, 'pia': 39, 'ppa': 65, 'rpe': 10, 'pna': 27, 'apa': 42, 'apm': 16, 'psa': 3, 'aia': 45, 'iia': 40, 'pne': 11, 'anm': 5, 'ppm': 2, 'fim': 4, 'rpa': 11, 'fna': 5, 'pma': 2, 'aim': 10, 'app': 8, 'ppp': 1, 'fpa': 1, 'iie': 19, 'ppe': 17, 'poa': 7, 'roa': 1, 'ana': 8, 'aie': 1, 'fnm': 2, 'lia': 3, 'lim': 1, 'poe': 1, 'fpe': 1, 'fpm': 4, 'aop': 1, 'pse': 6, 'asp': 1, 'asa': 1, 'ria': 2, 'rie': 2, 'aip': 7, 'rne': 1, 'fne': 1, 'lie': 2, '---': 1, 'pom': 1, 'ane': 1, 'iip': 1, 'ape': 1, 'asm': 1, 'aoe': 1, 'anp': 1, 'rma': 1})
Herodotus
urn:cts:greekLit:tlg0016.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\hdt-1-60-79-bu2-2019.xml
defaultdict(<class 'int'>, {'apa': 76, 'pia': 47, 'aia': 60, 'rpe': 14, 'ppa': 81, 'ppe': 33, 'iie': 36, 'poe': 4, 'pna': 31, 'apm': 37, 'pie': 17,

Lysias
urn:cts:greekLit:tlg0540.tlg019.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-19-bu1.xml
defaultdict(<class 'int'>, {'pia': 69, 'ase': 1, 'asa': 4, 'fia': 4, 'pna': 58, 'ppa': 70, 'fip': 3, 'ria': 12, 'pse': 1, 'iia': 57, 'fim': 9, 'ane': 6, 'ana': 36, 'ppe': 15, 'psm': 1, 'aim': 24, 'pie': 24, 'rpa': 21, 'rna': 8, 'apa': 24, 'aip': 17, 'apm': 9, 'app': 6, 'aia': 67, 'rie': 7, 'pnm': 3, 'poa': 14, 'rpe': 21, 'pim': 2, 'pme': 5, 'anm': 16, 'anp': 6, 'pne': 11, 'psa': 4, 'fpm': 1, 'fom': 1, 'iie': 15, 'aie': 1, 'iim': 2, 'fna': 4, 'pma': 10, 'aom': 4, 'fnm': 5, 'rne': 2, 'lie': 1, 'asm': 2, 'aoa': 5, 'poe': 2, 'lia': 1, 'aop': 2, 'ama': 1, 'rsa': 1, 'ppm': 1, 'fie': 1})
Lysias
urn:cts:greekLit:tlg0540.tlg023.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\lysias-23-bu1.xml
defaultdict(<class 'int'>, {'pna': 23, 'poe': 4, 'pia': 11, 'aia': 19, 'ppa': 23, 'fim': 7, 'ana': 7, 'iie': 8, 'apa': 12, 'aim': 11, 'poa': 1

Polybius
urn:cts:greekLit:tlg0543.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\polybius-6-2-15-bu1.xml
defaultdict(<class 'int'>, {'pia': 132, 'fia': 6, 'apm': 14, 'pna': 97, 'aim': 8, 'rpe': 29, 'iia': 27, 'pie': 54, 'rna': 5, 'ppa': 98, 'ana': 23, 'app': 12, 'aia': 18, 'rpa': 32, 'pne': 35, 'ppe': 62, 'asm': 8, 'aoa': 12, 'apa': 30, 'rie': 6, 'ria': 10, 'ane': 1, 'poa': 6, 'fnm': 1, 'pim': 2, 'pse': 8, 'asp': 4, 'psa': 13, 'poe': 3, 'fna': 4, 'fim': 7, 'asa': 15, 'fpp': 2, 'tne': 1, 'pnm': 2, 'ppm': 6, 'fpm': 4, 'iie': 8, 'aom': 2, 'ape': 1, 'rne': 1, 'aip': 3, 'aie': 1, 'fip': 2, 'rme': 1, 'aop': 1, 'lie': 1, 'fpa': 3, 'anp': 1, 'ase': 1})
Polybius
urn:cts:greekLit:tlg0543.tlg001.perseus-grc2
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\polybius-6-31-45-bu1.xml
defaultdict(<class 'int'>, {'rpa': 16, 'rpe': 26, 'pie': 93, 'ppa': 79, 'ppe': 35, 'pia': 129, 'pna': 56, 'pne': 11, 'psa': 21, 'apa': 43, 'app': 21, 'iia': 14, 'rne

Xenophon
http://data.perseus.org/texts/urn:cts:greekLit:tlg0032.tlg015.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\ps-xen-ath-pol-bu2.xml
defaultdict(<class 'int'>, {'aim': 10, 'pia': 198, 'apm': 3, 'pna': 103, 'aia': 22, 'pie': 39, 'fia': 11, 'ppa': 75, 'ppe': 31, 'ana': 30, 'psa': 18, 'aoa': 11, 'iia': 33, 'iie': 5, 'apa': 12, 'poa': 3, 'poe': 2, 'pne': 32, 'fim': 15, 'app': 2, 'aop': 1, 'rna': 2, 'lia': 1, 'rsa': 1, 'ria': 7, 'pse': 4, 'asa': 4, 'rpm': 2, 'rie': 7, 'rpa': 3, 'pim': 1, 'anm': 3, 'ase': 2, 'anp': 4, 'rpe': 4, 'fpm': 2, 'ane': 1, 'fpa': 2, 'pma': 2, 'ama': 1, 'rne': 1})
Thucydides
http://perseids.org/annotsrc/urn:cts:greekLit:tlg0003.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\thuc-1-1-20-bu5.xml
defaultdict(<class 'int'>, {'aia': 50, 'apm': 16, 'ppe': 41, 'apa': 34, 'fnm': 2, 'rpe': 12, 'ppa': 76, 'iia': 62, 'aim': 28, 'ana': 15, 'pia': 35, 'anm': 5, 'pie': 23, 'pna': 20, 'asm': 1, 'ppm': 4, 'i

Xenophon
urn:cts:greekLit:tlg0032.tlg007.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\xen-cyr-1-6-bu1.xml
defaultdict(<class 'int'>, {'apa': 22, 'apm': 6, 'iie': 15, 'iia': 125, 'aim': 9, 'pie': 46, 'anm': 12, 'app': 6, 'ppe': 45, 'ppa': 124, 'pia': 125, 'poa': 39, 'poe': 22, 'pna': 188, 'aom': 11, 'psa': 30, 'rie': 7, 'roe': 1, 'pma': 8, 'pne': 84, 'fpm': 3, 'fnm': 4, 'pse': 12, 'rna': 8, 'ria': 11, 'rpa': 8, 'ane': 3, 'fia': 9, 'fim': 13, 'asa': 13, 'ana': 12, 'fpe': 1, 'pim': 1, 'ppm': 1, 'tim': 1, 'fna': 10, 'asm': 7, 'ase': 1, 'rme': 1, 'pme': 2, 'rma': 4, 'rpe': 15, 'fpa': 3, 'aia': 23, 'aoa': 13, 'aip': 3, 'aie': 1, 'amp': 1, 'fie': 1, 'roa': 2, 'anp': 2, 'aop': 3, 'pp-': 1, 'asp': 1, 'psm': 1, 'lie': 3, 'lia': 2, 'pom': 1, 'ama': 1, 'ape': 1})
N/A
0032-007
c:\Users\bella\OneDrive\Documents\GitHub\gorman-trees\public\xml\xen-cyr-7-1-3-tree.xml
defaultdict(<class 'int'>, {'apm': 27, 'iia': 164, 'aia': 114, 'ana': 24, 'ppa': 114, 'rpa': 13, 'ppe': 

Aesop
defaultdict(<class 'int'>, {'iia': 181, 'ppa': 214, 'iie': 42, 'apa': 176, 'ria': 15, 'pia': 166, 'aia': 148, 'app': 30, 'asa': 37, 'ppe': 85, 'anm': 14, 'aip': 12, 'ana': 53, 'fia': 29, 'anp': 5, 'pma': 9, 'pie': 50, 'pna': 94, 'rpa': 19, 'pme': 9, 'asp': 4, 'fim': 15, 'aim': 26, 'rpe': 11, 'apm': 41, 'pne': 35, 'poe': 3, 'poa': 8, 'aop': 6, 'psa': 13, 'rie': 7, 'aoa': 5, 'aom': 3, 'rme': 2, 'lie': 3, 'ama': 4, 'lia': 4, 'fna': 6, 'asm': 6, 'fpa': 4, 'fip': 2, 'pse': 3, 'rna': 1, 'tie': 1, 'roa': 1})
Pseudo-Homer
defaultdict(<class 'int'>, {'ppe': 24, 'ana': 10, 'pie': 10, 'aia': 106, 'anm': 4, 'apa': 50, 'iia': 54, 'aim': 15, 'pia': 30, 'ama': 4, 'fia': 7, 'aoa': 3, 'app': 2, 'pna': 9, 'pma': 4, 'iie': 17, 'pse': 2, 'ppa': 29, 'rpe': 2, 'asa': 10, 'aip': 7, 'lie': 1, 'ria': 3, 'rie': 1, 'pme': 2, 'pne': 4, 'rpa': 2, 'apm': 3, 'lia': 2, 'fpa': 1, 'fim': 2, 'asm': 1, 'asp': 1, 'fna': 2, 'psa': 1})
Chariton
defaultdict(<class 'int'>, {'apm': 27, 'fim': 14, 'apa': 84, 'iia': 125, '

Lucianus
defaultdict(<class 'int'>, {'pne': 11, 'fia': 6, 'psa': 5, 'pia': 60, 'aop': 2, 'poa': 7, 'ppe': 15, 'rpa': 6, 'rna': 5, 'aom': 6, 'pma': 17, 'fpp': 4, 'aia': 21, 'ppa': 29, 'anp': 2, 'apa': 16, 'pna': 34, 'ana': 11, 'app': 4, 'ama': 4, 'rme': 1, 'fim': 15, 'fpa': 4, 'ria': 19, 'rpe': 7, 'aim': 9, 'anm': 7, 'rie': 7, 'rne': 10, 'iie': 7, 'aoa': 7, 'amm': 1, 'asa': 7, 'fpm': 1, 'pie': 8, 'rma': 1, 'apm': 5, 'fna': 4, 'iia': 26, 'fnm': 3, 'aip': 1, 'fip': 2, 'poe': 1, 'pme': 1})
Lucianus
defaultdict(<class 'int'>, {'pia': 70, 'rne': 7, 'ana': 43, 'iie': 47, 'anp': 5, 'iia': 130, 'pie': 24, 'aim': 24, 'ppa': 136, 'aia': 84, 'apa': 73, 'apm': 17, 'poe': 3, 'aip': 15, 'pna': 41, 'aom': 7, 'rna': 8, 'rpe': 16, 'ppe': 42, 'poa': 8, 'ria': 10, 'fim': 16, 'app': 18, 'anm': 7, 'pma': 9, 'psa': 7, 'pne': 23, 'aoa': 12, 'fpm': 3, 'fia': 13, 'rie': 6, 'ama': 6, 'lie': 10, 'asa': 7, 'lia': 14, 'rpa': 14, 'fpa': 3, 'pme': 2, 'aop': 6, 'fna': 3, 'amm': 4, 'fnm': 1, 'rme': 2, 'tie': 1, 'rma': 

Thucydides
urn:cts:greekLit:tlg0003.tlg001.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gAGDT\data\xml\tlg0003.tlg001.perseus-grc1.1.tb.xml
defaultdict(<class 'int'>, {'aia': 449, 'apm': 109, 'ppe': 251, 'apa': 360, 'fnm': 19, 'rpe': 53, 'ppa': 541, 'iia': 431, 'aim': 160, 'ana': 174, 'pia': 408, 'anm': 53, 'pie': 169, 'pna': 373, 'asm': 12, 'iie': 151, 'anp': 21, 'ape': 7, 'pne': 151, 'ria': 31, 'rne': 15, 'app': 61, 'rpa': 63, 'rna': 9, 'aip': 60, 'rie': 26, 'aoa': 27, 'poa': 32, 'aop': 6, 'poe': 11, 'fna': 21, 'lie': 26, 'lia': 12, 'fpm': 15, 'fia': 45, 'psa': 55, 'asa': 52, 'fim': 67, 'ane': 7, 'pse': 23, 'fpa': 13, 'asp': 11, 'fnp': 3, 'aom': 10, 'ame': 3, 'ama': 8, 'pma': 14, 'rsa': 1, 'pme': 7, 'fip': 7, 'amm': 2, 'rme': 2, 'ase': 3, 'foa': 1, 'aoe': 1, 'rma': 1, 'aie': 2, 'fpe': 1})
Sophocles
urn:cts:greekLit:tlg0011.tlg001.perseus-grc2
c:\Users\bella\OneDrive\Documents\GitHub\gAGDT\data\xml\tlg0011.tlg001.perseus-grc2.tb.xml
defaultdict(<class 'int'>, {'pia': 318, 'ap

Hesiod
urn:cts:greekLit:tlg0020.tlg002.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gAGDT\data\xml\tlg0020.tlg002.perseus-grc1.tb.xml
defaultdict(<class 'int'>, {'ppa': 109, 'pma': 14, 'pia': 160, 'ama': 2, 'apa': 58, 'aoe': 2, 'iia': 30, 'aoa': 16, 'aim': 14, 'aia': 73, 'pna': 99, 'ane': 6, 'amm': 3, 'poa': 22, 'pie': 52, 'apm': 25, 'fim': 17, 'pse': 14, 'ana': 33, 'aom': 10, 'rpa': 11, 'fpm': 2, 'fia': 22, 'fpa': 1, 'iie': 10, 'anm': 32, 'asm': 8, 'ap-': 1, 'anp': 3, 'pme': 10, 'ria': 20, 'rpe': 29, 'ppe': 37, 'app': 5, 'tie': 1, 'psa': 48, 'asa': 32, 'asp': 3, 'rie': 7, 'ase': 7, 'pne': 37, 'tia': 1, 'poe': 3, 'rne': 1, 'aop': 2, 'fna': 1, 'fie': 1, 'rma': 1, 'rme': 1})
Hesiod
urn:cts:greekLit:tlg0020.tlg003.perseus-grc1
c:\Users\bella\OneDrive\Documents\GitHub\gAGDT\data\xml\tlg0020.tlg003.perseus-grc1.tb.xml
defaultdict(<class 'int'>, {'apa': 29, 'aia': 76, 'iie': 50, 'iia': 109, 'app': 11, 'apm': 8, 'ana': 19, 'aom': 1, 'aoa': 3, 'ppe': 38, 'ppa': 74, 'aim': 27, 'aip': 6