In [1]:
# Some spec work done 12/21/18 following this tweet:
# https://twitter.com/MagisterConway/status/1075937446129471488
# Hasn't been reviewed yet. Tweet corrections, etc. to @diyclassics

In [2]:
# Imports

import os
import string
import re
from collections import Counter

from cltk.corpus.latin import latinlibrary
from cltk.tokenize.sentence import TokenizeSentence
from cltk.tokenize.word import WordTokenizer
from cltk.lemmatize.latin.backoff import BackoffLatinLemmatizer
from cltk.stem.latin.j_v import JVReplacer
from cltk.utils.file_operations import open_pickle

In [3]:
# Set up training sentences

rel_path = os.path.join('~/cltk_data/latin/model/latin_models_cltk/lemmata/backoff')
path = os.path.expanduser(rel_path)

# Check for presence of latin_pos_lemmatized_sents
file = 'latin_pos_lemmatized_sents.pickle'      

latin_pos_lemmatized_sents_path = os.path.join(path, file)
if os.path.isfile(latin_pos_lemmatized_sents_path):
    latin_pos_lemmatized_sents = open_pickle(latin_pos_lemmatized_sents_path)
else:
    latin_pos_lemmatized_sents = []
    print('The file %s is not available in cltk_data' % file)  

In [4]:
# Setup CLTK tools

word_tokenizer = WordTokenizer('latin')
sent_tokenizer = TokenizeSentence('latin')
lemmatizer = BackoffLatinLemmatizer(latin_pos_lemmatized_sents)
replacer = JVReplacer()

In [5]:
met_files = [file for file in latinlibrary.fileids() if 'ovid.met' in file]
met_order = [int(" ".join(re.findall(r'\d+', item))) for item in met_files]
met_files = [x for _, x in sorted(zip(met_order, met_files))]
print(met_files)

['ovid/ovid.met1.txt', 'ovid/ovid.met2.txt', 'ovid/ovid.met3.txt', 'ovid/ovid.met4.txt', 'ovid/ovid.met5.txt', 'ovid/ovid.met6.txt', 'ovid/ovid.met7.txt', 'ovid/ovid.met8.txt', 'ovid/ovid.met9.txt', 'ovid/ovid.met10.txt', 'ovid/ovid.met11.txt', 'ovid/ovid.met12.txt', 'ovid/ovid.met13.txt', 'ovid/ovid.met14.txt', 'ovid/ovid.met15.txt']


In [6]:
# Get raw text of Metamorphoses

met_raw = latinlibrary.raw(met_files)

In [7]:
# Preprocessing script for the Latin Library

def preprocess(text):    
    
    remove_list = [
        r'Ovid: Metamorph*oses .+',
        r'P. OVIDI NASONIS METAMORPHOSEN LIBER .+',
        r'\bOvid\b',
        r'The Latin Library',
        r'The Classics Page',
    ]
    
    for pattern in remove_list:
        text = re.sub(pattern, '', text)
    
    text = text.lower()
    
    text= re.sub(r'&lt;','<',text)
    text= re.sub(r'&gt;','>',text)    

    punctuation = string.punctuation
    #punctuation += "\"#$%&\'()*+,-/:;<=>@[\]^_`{|}~"
    translator = str.maketrans({key: " " for key in punctuation})
    text = text.translate(translator)
    
    translator = str.maketrans({key: " " for key in '0123456789'})
    text = text.translate(translator)
    
    text = replacer.replace(text)
    
    text = re.sub('[ ]+',' ', text) # Remove double spaces
    text = re.sub('\s+\n+\s+','\n', text) # Remove double lines and trim spaces around new lines
    
    return text.strip()

In [8]:
met_raw[:100]

'Ovid: Metamorposes I\r\n\t\t \r\n\r\n\t\t \r\n\t\t \r\n\t \r\n\t\r\n \r\n\r\n P. OVIDI NASONIS METAMORPHOSEN LIBER PRIMVS\r\n \r\n'

In [9]:
# Preprocess Latin Library text

met_pp = preprocess(met_raw)
print(met_pp[:100])

in noua fert animus mutatas dicere formas 
corpora di coeptis nam uos mutastis et illas 
adspirate


In [10]:
# Tokenize Latin Library text

met_tokens = word_tokenizer.tokenize(met_pp)
print(met_tokens[:50])

['in', 'noua', 'fert', 'animus', 'mutatas', 'dicere', 'formas', 'corpora', 'di', 'coeptis', 'nam', 'uos', 'mutastis', 'et', 'illas', 'adspirate', 'meis', 'prima', '-que', 'ab', 'origine', 'mundi', 'ad', 'mea', 'perpetuum', 'deducite', 'tempora', 'carmen', 'ante', 'mare', 'et', 'terras', 'et', 'quod', 'tegit', 'omnia', 'caelum', 'unus', 'erat', 'toto', 'naturae', 'uultus', 'in', 'orbe', 'quem', 'dixere', 'chaos', 'rudis', 'indigesta', '-que']


In [11]:
# Get total token counts

met_tokens_len = len(met_tokens)
met_tokens_set_len = len(set(met_tokens))

In [12]:
# Print top 10 token counts

print('Number of tokens in Metamorphoses:', met_tokens_len)
print('Number of unique tokens in Metamorphoses:', met_tokens_set_len)

Number of tokens in Metamorphoses: 82834
Number of unique tokens in Metamorphoses: 18382


In [13]:
# Build counter of top token counts

met_tokens_counter = Counter(met_tokens)
met_tokens_mc = met_tokens_counter.most_common(10000)

running = 0

print('Top 25 tokens in Metamorphoses:\n')
print("{number:>5}  {token:<12}{count:<12}{percent:<12}{running:<12}".format(number="", token="TOKEN", count="COUNT", percent="Type-Tok %", running = "RUNNING %"))
for i, pair in enumerate(met_tokens_mc[:10]):
    running += pair[1]
    print("{number:>5}. {token:<12}{count:<12}{percent:<12}{running:<12}".format(number=i+1, token=pair[0], count=pair[1], percent=str(round(pair[1] / len(met_tokens)*100, 2))+"%", running = str(round(running / len(met_tokens)*100, 2))+"%"))

Top 25 tokens in Metamorphoses:

       TOKEN       COUNT       Type-Tok %  RUNNING %   
    1. -que        4383        5.29%       5.29%       
    2. et          2131        2.57%       7.86%       
    3. in          1164        1.41%       9.27%       
    4. est         987         1.19%       10.46%      
    5. nec         629         0.76%       11.22%      
    6. non         588         0.71%       11.93%      
    7. cum         462         0.56%       12.49%      
    8. ut          379         0.46%       12.95%      
    9. per         331         0.4%        13.34%      
   10. -ne         319         0.39%       13.73%      


In [14]:
with open("data/met_counts/met_tokens.txt", 'w') as f:
    for k,v in  met_tokens_counter.most_common():
        f.write( "{} {}\n".format(k,v))

In [15]:
# Lemmatize Latin Library text

met_lemma_pairs = lemmatizer.lemmatize(met_tokens)
print(met_lemma_pairs)

[('in', 'in'), ('noua', 'nouus'), ('fert', 'fero'), ('animus', 'animus'), ('mutatas', 'muto'), ('dicere', 'dico'), ('formas', 'forma'), ('corpora', 'corpus'), ('di', 'deus'), ('coeptis', 'coepio'), ('nam', 'nam'), ('uos', 'tu'), ('mutastis', 'muto'), ('et', 'et'), ('illas', 'ille'), ('adspirate', 'adspiro'), ('meis', 'meus'), ('prima', 'primus'), ('-que', '-que'), ('ab', 'ab'), ('origine', 'origo'), ('mundi', 'mundus'), ('ad', 'ad'), ('mea', 'meus'), ('perpetuum', 'perpetuus'), ('deducite', 'deduco'), ('tempora', 'tempus'), ('carmen', 'carmen'), ('ante', 'ante'), ('mare', 'mare'), ('et', 'et'), ('terras', 'terra'), ('et', 'et'), ('quod', 'qui'), ('tegit', 'tego'), ('omnia', 'omnis'), ('caelum', 'caelum'), ('unus', 'unus'), ('erat', 'sum'), ('toto', 'totus'), ('naturae', 'natura'), ('uultus', 'uultus'), ('in', 'in'), ('orbe', 'orbis'), ('quem', 'qui'), ('dixere', 'dico'), ('chaos', 'chaos'), ('rudis', 'rudis'), ('indigesta', 'indigestus'), ('-que', '-que'), ('moles', 'moles'), ('nec', '

In [16]:
# Get total lemma counts

met_lemmas = [lemma[1] for lemma in met_lemma_pairs]
met_lemmas_set_len = len(set(met_lemmas))

In [17]:
# Print top 10 token counts

print('Number of tokens in Metamorphoses:', met_tokens_len)
print('Number of unique tokens in Metamorphoses:', met_tokens_set_len)
print('Number of unique lemmas in Metamorphoses:', met_lemmas_set_len)

Number of tokens in Metamorphoses: 82834
Number of unique tokens in Metamorphoses: 18382
Number of unique lemmas in Metamorphoses: 7772


In [18]:
# Build counter of top lemma counts

met_lemmas_counter = Counter(met_lemmas)
met_lemmas_mc = met_lemmas_counter.most_common(10000)

#print('Top 10 lemmas in Metamorphoses:\n')
#for i, pair in enumerate(met_lemmas_mc[:10]):
#    print("{number}. {lemma}\t\t{count}\t\t{percent}%".format(number=i+1, lemma=pair[0], count=pair[1], percent=round(pair[1] / len(met_tokens)*100, 2)))

running = 0

print('Top 10 lemmas in Metamorphoses:\n')
print("{number:>5}  {lemma:<12}{count:<12}{percent:<12}{running:<12}".format(number="", lemma="LEMMA", count="COUNT", percent="TYPE-LEM %", running = "RUNNING %"))
for i, pair in enumerate(met_lemmas_mc[:10]):
    running += pair[1]
    print("{number:>5}. {lemma:<12}{count:<12}{percent:<12}{running:<12}".format(number=i+1, lemma=pair[0], count=pair[1], percent=str(round(pair[1] / len(met_tokens)*100, 2))+"%", running = str(round(running / len(met_tokens)*100, 2))+"%"))    

Top 10 lemmas in Metamorphoses:

       LEMMA       COUNT       TYPE-LEM %  RUNNING %   
    1. -que        4385        5.29%       5.29%       
    2. sum         2166        2.61%       7.91%       
    3. et          2131        2.57%       10.48%      
    4. qui         1276        1.54%       12.02%      
    5. in          1164        1.41%       13.43%      
    6. ille        784         0.95%       14.37%      
    7. hic         773         0.93%       15.31%      
    8. neque       729         0.88%       16.19%      
    9. ego         608         0.73%       16.92%      
   10. non         588         0.71%       17.63%      


In [19]:
# Print top 10,000 counts

print('Top 10,000 tokens in the Metamorphoses:\n')
for i, pair in enumerate(met_tokens_mc):
    print("{number}. {token} ({count})".format(number=i+1, token=pair[0], count=pair[1]))

Top 10,000 tokens in the Metamorphoses:

1. -que (4383)
2. et (2131)
3. in (1164)
4. est (987)
5. nec (629)
6. non (588)
7. cum (462)
8. ut (379)
9. per (331)
10. -ne (319)
11. quae (297)
12. sed (292)
13. tamen (290)
14. mihi (275)
15. ad (274)
16. quoque (274)
17. quod (270)
18. si (251)
19. erat (244)
20. me (222)
21. iam (221)
22. illa (221)
23. ille (212)
24. quam (210)
25. qui (206)
26. ab (190)
27. quid (185)
28. de (183)
29. tibi (177)
30. dum (165)
31. se (163)
32. nunc (161)
33. dixit (160)
34. te (159)
35. modo (152)
36. at (151)
37. sua (148)
38. esse (145)
39. sic (143)
40. haec (143)
41. hoc (143)
42. fuit (140)
43. sub (139)
44. aut (138)
45. a (130)
46. quo (125)
47. ait (124)
48. ora (123)
49. ubi (118)
50. tum (117)
51. ipse (117)
52. quem (116)
53. sine (115)
54. qua (110)
55. sit (110)
56. enim (107)
57. ego (106)
58. ante (104)
59. mea (102)
60. hic (102)
61. tu (101)
62. sanguine (100)
63. neque (100)
64. nam (99)
65. nisi (98)
66. ne (98)
67. atque (96)
68. pro (

1342. lege (9)
1343. metus (9)
1344. breue (9)
1345. fuerunt (9)
1346. tertia (9)
1347. scelerata (9)
1348. fluctibus (9)
1349. mariti (9)
1350. patrios (9)
1351. arduus (9)
1352. mole (9)
1353. dira (9)
1354. iras (9)
1355. sublimis (9)
1356. ferus (9)
1357. fore (9)
1358. tantae (9)
1359. temporis (9)
1360. grauem (9)
1361. perdere (9)
1362. domino (9)
1363. ueteris (9)
1364. occidit (9)
1365. antris (9)
1366. tectus (9)
1367. barba (9)
1368. fluit (9)
1369. pressit (9)
1370. longi (9)
1371. percussit (9)
1372. uias (9)
1373. sacris (9)
1374. exire (9)
1375. nostrae (9)
1376. mentem (9)
1377. tetigere (9)
1378. gradus (9)
1379. uterque (9)
1380. pronus (9)
1381. uetustas (9)
1382. ducere (9)
1383. laborum (9)
1384. solo (9)
1385. aluo (9)
1386. quippe (9)
1387. apta (9)
1388. telis (9)
1389. certamine (9)
1390. iuuenum (9)
1391. ignara (9)
1392. fortibus (9)
1393. abiit (9)
1394. leui (9)
1395. hostes (9)
1396. doloris (9)
1397. armenta (9)
1398. fuga (9)
1399. passu (9)
1400. haeret

1841. fero (7)
1842. humano (7)
1843. fratris (7)
1844. atlas (7)
1845. maximus (7)
1846. excidit (7)
1847. dolet (7)
1848. guttae (7)
1849. propior (7)
1850. uirides (7)
1851. odit (7)
1852. patres (7)
1853. it (7)
1854. pharetram (7)
1855. narrare (7)
1856. parantem (7)
1857. lateri (7)
1858. crescere (7)
1859. ausa (7)
1860. uento (7)
1861. reuerentia (7)
1862. regina (7)
1863. iunone (7)
1864. thalamo (7)
1865. niueis (7)
1866. prolem (7)
1867. tribus (7)
1868. gemino (7)
1869. ultro (7)
1870. petiit (7)
1871. omen (7)
1872. frigus (7)
1873. exercet (7)
1874. corde (7)
1875. claro (7)
1876. gemini (7)
1877. reddere (7)
1878. fila (7)
1879. impetus (7)
1880. cognata (7)
1881. sonus (7)
1882. digiti (7)
1883. tuam (7)
1884. boues (7)
1885. lapis (7)
1886. merito (7)
1887. secus (7)
1888. postes (7)
1889. atris (7)
1890. miserrima (7)
1891. pelle (7)
1892. moram (7)
1893. toris (7)
1894. ueniat (7)
1895. differt (7)
1896. uenenis (7)
1897. orbes (7)
1898. tristia (7)
1899. uestrae (7)

2841. belua (5)
2842. conclamat (5)
2843. accipiunt (5)
2844. orant (5)
2845. nauis (5)
2846. aberat (5)
2847. dextro (5)
2848. scopulum (5)
2849. resoluta (5)
2850. sternit (5)
2851. tactu (5)
2852. remansit (5)
2853. epulis (5)
2854. fortissime (5)
2855. multorum (5)
2856. mutauit (5)
2857. freto (5)
2858. quaeris (5)
2859. ueniebat (5)
2860. insuper (5)
2861. inimica (5)
2862. profuit (5)
2863. cuspis (5)
2864. cultu (5)
2865. committere (5)
2866. concurrere (5)
2867. prohibent (5)
2868. casu (5)
2869. castra (5)
2870. lacerto (5)
2871. fraxinus (5)
2872. superat (5)
2873. fame (5)
2874. potius (5)
2875. fregit (5)
2876. turbae (5)
2877. clipeo (5)
2878. mouentem (5)
2879. uincis (5)
2880. uolui (5)
2881. pallada (5)
2882. dux (5)
2883. certaminis (5)
2884. silentum (5)
2885. agitur (5)
2886. excutit (5)
2887. ualido (5)
2888. pudori (5)
2889. repetit (5)
2890. parili (5)
2891. messes (5)
2892. certum (5)
2893. soluerat (5)
2894. instat (5)
2895. sustinui (5)
2896. manat (5)
2897. a

3756. opacas (4)
3757. petentem (4)
3758. sacrum (4)
3759. mentita (4)
3760. procri (4)
3761. concursibus (4)
3762. debueram (4)
3763. locutus (4)
3764. poscunt (4)
3765. datur (4)
3766. timoris (4)
3767. taurorum (4)
3768. auido (4)
3769. praeda (4)
3770. patiar (4)
3771. sospes (4)
3772. maturus (4)
3773. uiderunt (4)
3774. peti (4)
3775. horum (4)
3776. meritorum (4)
3777. relictus (4)
3778. solida (4)
3779. aurae (4)
3780. cultros (4)
3781. carchesia (4)
3782. geminis (4)
3783. mortali (4)
3784. moratur (4)
3785. obstipuere (4)
3786. cubito (4)
3787. terruit (4)
3788. tirynthius (4)
3789. capulo (4)
3790. sospite (4)
3791. phocus (4)
3792. aeacidae (4)
3793. aetate (4)
3794. status (4)
3795. cinis (4)
3796. requiris (4)
3797. plenum (4)
3798. debuit (4)
3799. fundit (4)
3800. funeribus (4)
3801. redde (4)
3802. ciues (4)
3803. uidebar (4)
3804. pares (4)
3805. faciet (4)
3806. procris (4)
3807. inuitum (4)
3808. exemplum (4)
3809. coegi (4)
3810. feram (4)
3811. collis (4)
3812. pr

4840. turris (3)
4841. musa (3)
4842. uictae (3)
4843. tali (3)
4844. certare (3)
4845. falso (3)
4846. praetemptat (3)
4847. glaebam (3)
4848. dimouit (3)
4849. regit (3)
4850. incerta (3)
4851. labentibus (3)
4852. uelo (3)
4853. aequales (3)
4854. raptor (3)
4855. sulphure (3)
4856. rupta (3)
4857. bimari (3)
4858. gens (3)
4859. cyane (3)
4860. exstitit (3)
4861. conponere (3)
4862. dilexit (3)
4863. pronos (3)
4864. paruas (3)
4865. dulce (3)
4866. auidam (3)
4867. sicaniam (3)
4868. colonos (3)
4869. auidae (3)
4870. rapinae (3)
4871. elide (3)
4872. colo (3)
4873. interrita (3)
4874. reperire (3)
4875. uocas (3)
4876. feremus (3)
4877. excepit (3)
4878. cibos (3)
4879. cereri (3)
4880. indicio (3)
4881. plumas (3)
4882. insistere (3)
4883. faciles (3)
4884. maesta (3)
4885. habebam (3)
4886. iuuabat (3)
4887. lassa (3)
4888. cucurri (3)
4889. inquam (3)
4890. inclusa (3)
4891. latens (3)
4892. moui (3)
4893. cauernis (3)
4894. fertilis (3)
4895. barbarus (3)
4896. recipit (3)
48

5339. proteus (3)
5340. aequoreae (3)
5341. haerebat (3)
5342. fibris (3)
5343. peleu (3)
5344. putetis (3)
5345. rabie (3)
5346. rubet (3)
5347. pelea (3)
5348. carinis (3)
5349. legi (3)
5350. feroces (3)
5351. ardescunt (3)
5352. murum (3)
5353. subeunt (3)
5354. ceyca (3)
5355. frangitur (3)
5356. inanes (3)
5357. uarias (3)
5358. somnum (3)
5359. naufragus (3)
5360. subuolat (3)
5361. augur (3)
5362. cadet (3)
5363. cauas (3)
5364. credulitas (3)
5365. senserunt (3)
5366. ferri (3)
5367. fatus (3)
5368. passus (3)
5369. penetrauit (3)
5370. achillem (3)
5371. bimembres (3)
5372. auidum (3)
5373. ursae (3)
5374. pirithoi (3)
5375. gratissime (3)
5376. macareus (3)
5377. aggere (3)
5378. credita (3)
5379. phrygum (3)
5380. urnam (3)
5381. tydides (3)
5382. uulgi (3)
5383. classem (3)
5384. feroci (3)
5385. aiaci (3)
5386. prosit (3)
5387. metuendus (3)
5388. fidum (3)
5389. diomede (3)
5390. clam (3)
5391. achilli (3)
5392. magnus (3)
5393. fecimus (3)
5394. aiacis (3)
5395. troiam 

6622. iunci (2)
6623. liquor (2)
6624. stagni (2)
6625. sume (2)
6626. cytoriaco (2)
6627. optauit (2)
6628. properabat (2)
6629. circumspexit (2)
6630. genuere (2)
6631. beati (2)
6632. fortunata (2)
6633. pendentibus (2)
6634. rubenti (2)
6635. resonant (2)
6636. poscenti (2)
6637. sororia (2)
6638. ferenti (2)
6639. tecum (2)
6640. trado (2)
6641. uacuis (2)
6642. temperie (2)
6643. tenero (2)
6644. uitro (2)
6645. inplicat (2)
6646. intexere (2)
6647. truncos (2)
6648. denegat (2)
6649. inhaerebat (2)
6650. inprobe (2)
6651. iunguntur (2)
6652. tenaci (2)
6653. duplex (2)
6654. uirili (2)
6655. obstrepuere (2)
6656. frondescere (2)
6657. uites (2)
6658. stamine (2)
6659. dubiae (2)
6660. repente (2)
6661. pingues (2)
6662. ardere (2)
6663. aedes (2)
6664. saeuarum (2)
6665. fumida (2)
6666. diuersae (2)
6667. membrana (2)
6668. includit (2)
6669. perdiderint (2)
6670. sustinuere (2)
6671. operire (2)
6672. ditis (2)
6673. capax (2)
6674. apertas (2)
6675. forum (2)
6676. antiquae (

7133. spina (2)
7134. uiret (2)
7135. limoso (2)
7136. saliunt (2)
7137. lycia (2)
7138. summos (2)
7139. ruricolae (2)
7140. pelops (2)
7141. ostendisse (2)
7142. finitimi (2)
7143. propinquae (2)
7144. orauere (2)
7145. nobilis (2)
7146. humiles (2)
7147. subuecta (2)
7148. threicius (2)
7149. pandio (2)
7150. procnes (2)
7151. pronuba (2)
7152. funere (2)
7153. raptas (2)
7154. profanus (2)
7155. tyranno (2)
7156. itys (2)
7157. iussere (2)
7158. utilitas (2)
7159. dabis (2)
7160. remige (2)
7161. fausto (2)
7162. sermo (2)
7163. spondere (2)
7164. naidas (2)
7165. dryadas (2)
7166. paratus (2)
7167. cremet (2)
7168. libido (2)
7169. ingentibus (2)
7170. sollicitare (2)
7171. capiunt (2)
7172. uisura (2)
7173. ambarum (2)
7174. ponitur (2)
7175. odrysius (2)
7176. uoluisti (2)
7177. lenime (2)
7178. mandabat (2)
7179. admotum (2)
7180. praedator (2)
7181. nido (2)
7182. fessis (2)
7183. clamato (2)
7184. sorore (2)
7185. haeserat (2)
7186. passos (2)
7187. uirginitas (2)
7188. claus

8338. concretam (2)
8339. rebar (2)
8340. annosa (2)
8341. contingent (2)
8342. orbata (2)
8343. foedata (2)
8344. soluto (2)
8345. sacrificos (2)
8346. adspicias (2)
8347. praestem (2)
8348. uolumina (2)
8349. densetur (2)
8350. cineri (2)
8351. cythereius (2)
8352. anius (2)
8353. falleris (2)
8354. therses (2)
8355. troiani (2)
8356. priamides (2)
8357. helenus (2)
8358. oppositum (2)
8359. zephyris (2)
8360. teneras (2)
8361. acidis (2)
8362. edam (2)
8363. telemus (2)
8364. polyphemo (2)
8365. latitans (2)
8366. hausi (2)
8367. durior (2)
8368. feta (2)
8369. sentitur (2)
8370. seruiet (2)
8371. ouilibus (2)
8372. coagula (2)
8373. despice (2)
8374. regnare (2)
8375. obumbrat (2)
8376. densissima (2)
8377. hirtae (2)
8378. aetnam (2)
8379. saltibus (2)
8380. timentes (2)
8381. quantam (2)
8382. innitens (2)
8383. tangeris (2)
8384. faucibus (2)
8385. liquerat (2)
8386. zancle (2)
8387. salute (2)
8388. aenean (2)
8389. hippotadae (2)
8390. sibyllae (2)
8391. cumaea (2)
8392. pulue

8837. instituit (1)
8838. ludos (1)
8839. pythia (1)
8840. domitae (1)
8841. aesculeae (1)
8842. capiebat (1)
8843. decentia (1)
8844. cingebat (1)
8845. qualibet (1)
8846. daphne (1)
8847. lasciue (1)
8848. pestifero (1)
8849. iugera (1)
8850. strauimus (1)
8851. pythona (1)
8852. inritare (1)
8853. laudes (1)
8854. figat (1)
8855. eliso (1)
8856. sagittifera (1)
8857. prompsit (1)
8858. diuersorum (1)
8859. auratum (1)
8860. obtusum (1)
8861. peneide (1)
8862. apollineas (1)
8863. traiecta (1)
8864. exuuiis (1)
8865. innuptae (1)
8866. coercebat (1)
8867. nemora (1)
8868. hymen (1)
8869. exosa (1)
8870. suffuderat (1)
8871. obsequitur (1)
8872. daphnes (1)
8873. stipulae (1)
8874. demptis (1)
8875. adolentur (1)
8876. uiator (1)
8877. sterilem (1)
8878. sperando (1)
8879. comantur (1)
8880. reuocantis (1)
8881. resistit (1)
8882. penei (1)
8883. insequor (1)
8884. aquilam (1)
8885. sequendi (1)
8886. cadas (1)
8887. indigna (1)
8888. notent (1)
8889. sentes (1)
8890. curre (1)
8891. 

9837. debet (1)
9838. satiatae (1)
9839. erili (1)
9840. infectus (1)
9841. contraxerat (1)
9842. lustra (1)
9843. uagantes (1)
9844. participes (1)
9845. hyantius (1)
9846. croceis (1)
9847. reducet (1)
9848. repetemus (1)
9849. sistite (1)
9850. intermittunt (1)
9851. cupressu (1)
9852. gargaphie (1)
9853. succinctae (1)
9854. laboratum (1)
9855. simulauerat (1)
9856. tofis (1)
9857. natiuum (1)
9858. perlucidus (1)
9859. gramineo (1)
9860. perfundere (1)
9861. retentos (1)
9862. depositae (1)
9863. demunt (1)
9864. ismenis (1)
9865. crocale (1)
9866. nephele (1)
9867. hyale (1)
9868. rhanis (1)
9869. psecas (1)
9870. phiale (1)
9871. perluitur (1)
9872. errans (1)
9873. percussere (1)
9874. circumfusae (1)
9875. texere (1)
9876. supereminet (1)
9877. infectis (1)
9878. stipata (1)
9879. promptas (1)
9880. spargens (1)
9881. ultricibus (1)
9882. futurae (1)
9883. narres (1)
9884. sparso (1)
9885. cacuminat (1)
9886. maculoso (1)
9887. additus (1)
9888. autonoeius (1)
9889. fluxerunt 

In [20]:
# Print top 10,000 lemma counts

print('Top 10,000 lemmas in the Metamorphoses:\n')
for i, pair in enumerate(met_lemmas_mc):
    print("{number}. {lemma} ({count})".format(number=i+1, lemma=pair[0], count=pair[1]))

Top 10,000 lemmas in the Metamorphoses:

1. -que (4385)
2. sum (2166)
3. et (2131)
4. qui (1276)
5. in (1164)
6. ille (784)
7. hic (773)
8. neque (729)
9. ego (608)
10. non (588)
11. tu (507)
12. suus (472)
13. cum2 (462)
14. uideo (427)
15. do (403)
16. ut (379)
17. dico (364)
18. per (331)
19. ab (320)
20. -ne (319)
21. possum (311)
22. fero (301)
23. facio (295)
24. sed (292)
25. tamen (290)
26. deus (280)
27. ipse (280)
28. os (275)
29. ad (274)
30. quoque (274)
31. corpus (261)
32. meus (253)
33. si (251)
34. omnis (245)
35. quis (242)
36. sui (233)
37. iam (221)
38. quam (210)
39. pars (207)
40. habeo (206)
41. terra (200)
42. de (192)
43. unda (188)
44. manus (187)
45. peto (184)
46. magnus (183)
47. teneo (171)
48. unus (168)
49. dum (165)
50. pectus (164)
51. longus (162)
52. nunc (161)
53. noster (159)
54. tuus (157)
55. at (157)
56. tantus (153)
57. modo (152)
58. multus (146)
59. nomen (146)
60. sic (143)
61. iuppiter (143)
62. nullus (142)
63. primus (141)
64. ignis (139)


829. laeuus (17)
830. quater (17)
831. centum (17)
832. facinus (17)
833. adicio (17)
834. nubila (17)
835. agito (17)
836. leo (17)
837. prosum (17)
838. rector (17)
839. paternus (17)
840. saeuus (17)
841. laus (17)
842. propero (17)
843. retro (17)
844. muto1 (17)
845. lignum (17)
846. error (17)
847. niteo (17)
848. forsitan (17)
849. arceo (17)
850. como1 (17)
851. auis (17)
852. sileo (17)
853. utilis (17)
854. ater (17)
855. scopulus (17)
856. nutrix (17)
857. damno (17)
858. memini (17)
859. tempto (17)
860. gaudium (17)
861. siquis (17)
862. achilles (17)
863. pulso (16)
864. orbo (16)
865. sublimis (16)
866. erigo (16)
867. communis (16)
868. avidus (16)
869. aduncus (16)
870. maximus (16)
871. exsto (16)
872. iustus (16)
873. femineus (16)
874. moror (16)
875. fax (16)
876. furtum (16)
877. diuus (16)
878. exclamo (16)
879. naris (16)
880. corona (16)
881. torqueo (16)
882. tenebrae (16)
883. en (16)
884. imber (16)
885. deficio (16)
886. paro1 (16)
887. quotiens (16)
888. a

1828. candeo (6)
1829. horridus (6)
1830. dolus (6)
1831. sanguineus (6)
1832. monumentum (6)
1833. convivium (6)
1834. rusticus (6)
1835. discrimen (6)
1836. iugulum (6)
1837. rabies (6)
1838. ocior (6)
1839. repono (6)
1840. aquilo (6)
1841. intremo (6)
1842. patefacio (6)
1843. nereis (6)
1844. ouis (6)
1845. adhaereo (6)
1846. themis (6)
1847. repleo (6)
1848. formo (6)
1849. delubrum (6)
1850. iussum (6)
1851. rigor (6)
1852. truncus (6)
1853. letal (6)
1854. delius (6)
1855. umbrosus (6)
1856. auro (6)
1857. impatiens (6)
1858. pulcher (6)
1859. asper (6)
1860. obvius (6)
1861. absumo (6)
1862. complector (6)
1863. triumphus (6)
1864. pompa (6)
1865. augustus (6)
1866. resideo (6)
1867. quodsi (6)
1868. servo (6)
1869. conspicio (6)
1870. admiror (6)
1871. decerpo (6)
1872. levis1 (6)
1873. mutuus (6)
1874. abstraho (6)
1875. fistula (6)
1876. calamus (6)
1877. cyllenius (6)
1878. clymene (6)
1879. aequal (6)
1880. calceo (6)
1881. publicus (6)
1882. detraho (6)
1883. paeniteo (6

3328. cerebrum (3)
3329. bimembres (3)
3330. vertex (3)
3331. pirithoi (3)
3332. redimo (3)
3333. macareus (3)
3334. aggero2 (3)
3335. phrygum (3)
3336. tydides (3)
3337. diomede (3)
3338. clam (3)
3339. troiam (3)
3340. reposco1 (3)
3341. penso (3)
3342. telamonius (3)
3343. troas (3)
3344. requie (3)
3345. troum (3)
3346. atridae (3)
3347. hostia (3)
3348. catulus (3)
3349. pelasgos (3)
3350. memnonis (3)
3351. sacerdos1 (3)
3352. ausoniae (3)
3353. sibylla (3)
3354. achaemenis (3)
3355. turnus (3)
3356. uenulus (3)
3357. iulius (3)
3358. latinus (3)
3359. incurvo (3)
3360. romana (3)
3361. hersilie (3)
3362. numam (3)
3363. genero (3)
3364. senatus (3)
3365. abscido (2)
3366. eximo (2)
3367. concordi (2)
3368. possedo (2)
3369. congeriem (2)
3370. aequalis (2)
3371. liber1 (2)
3372. tonitruum (2)
3373. tepesco (2)
3374. scythiam (2)
3375. opifex (2)
3376. effigies (2)
3377. de_-recingo (2)
3378. arbuteus (2)
3379. montanus (2)
3380. fraga (2)
3381. stillo (2)
3382. pretiosus (2)
338

4827. occulto2 (1)
4828. battum (1)
4829. rependo (1)
4830. munychios (1)
4831. lycei (1)
4832. miluus (1)
4833. agilis (1)
4834. circino (1)
4835. adiuuo (1)
4836. tersis (1)
4837. testudo (1)
4838. pandrose (1)
4839. mercurium (1)
4840. pleio (1)
4841. aegida (1)
4842. lemnicolae (1)
4843. abundo (1)
4844. semesaris (1)
4845. recta (1)
4846. robigo (1)
4847. felle (1)
4848. successus (1)
4849. spinea (1)
4850. protero (1)
4851. sentis1 (1)
4852. fortuno (1)
4853. herses (1)
4854. spinosus (1)
4855. excludo (1)
4856. blandimentum (1)
4857. respiramen (1)
4858. seuocat (1)
4859. sidonida (1)
4860. tyriis (1)
4861. trisulcus (1)
4862. inpedienda (1)
4863. dorsum (1)
4864. dictaea (1)
4865. cadmo (1)
4866. perquiro (1)
4867. castalio (1)
4868. seruitii (1)
4869. subsequor (1)
4870. cephisi (1)
4871. panopes (1)
4872. conpagibus (1)
4873. infaustus (1)
4874. squamosus (1)
4875. vestigo (1)
4876. tegume (1)
4877. splendeo (1)
4878. letata (1)
4879. tergoris (1)
4880. molaris (1)
4881. albi

6327. iphide (1)
6328. cnosiaco (1)
6329. phaestia (1)
6330. ligdum (1)
6331. ingenuus (1)
6332. uoueam (1)
6333. releuere (1)
6334. abominor (1)
6335. ligdo (1)
6336. latrator (1)
6337. a-nubo (1)
6338. bubastis (1)
6339. osiris (1)
6340. cressa (1)
6341. al (1)
6342. despondeo (1)
6343. phaestiadas (1)
6344. laudatus (1)
6345. teleste (1)
6346. magister (1)
6347. confluo (1)
6348. optabilis (1)
6349. causor (1)
6350. isi (1)
6351. paraetonium (1)
6352. mareotica (1)
6353. pharon (1)
6354. punior (1)
6355. iuua (1)
6356. crepo (1)
6357. sonabilis (1)
6358. incomptus (1)
6359. sollemne (1)
6360. stridulus (1)
6361. gravis (1)
6362. tal (1)
6363. de_-fleo (1)
6364. taenaria (1)
6365. inamoenus (1)
6366. uincirem (1)
6367. vipera (1)
6368. eurydices (1)
6369. retexite (1)
6370. refugus (1)
6371. uacarunt (1)
6372. uictaris (1)
6373. auernas (1)
6374. olenos (1)
6375. lethaea (1)
6376. portitor (1)
6377. chaonis (1)
6378. asum (1)
6379. aesculus (1)
6380. corylus (1)
6381. enodis (1)
6382

In [21]:
with open("data/met_counts/met_lemmas.txt", 'w') as f:
    for k,v in  met_lemmas_counter.most_common():
        f.write( "{} {}\n".format(k,v))