## Simple gemination

Citations: Wills 1996, 45-49

In [1]:
from random import sample
from collections import Counter

from nltk import bigrams

from cltk.corpus.readers import get_corpus_reader
from cltk.tokenize.latin.sentence import SentenceTokenizer

from pprint import pprint

from latin_tools import preprocess

In [2]:
# Get corpus

tesserae = get_corpus_reader(corpus_name = 'latin_text_tesserae', language = 'latin')
files = list(tesserae.fileids())
texts = tesserae.texts(files)

In [3]:
# Set up tools

sent_tokenizer = SentenceTokenizer(strict=True)

### Simple gemination exaple: V. *Ecl.* 2.69

In [4]:
ecl_files = [file for file in files if 'vergil.ecl' in file]

In [5]:
ecl_text = next(tesserae.texts(ecl_files))

In [6]:
# Add a poetry formatting parameter for sentence tokenizer?
ecl_sents = [" / ".join(sent.split('\n')) for sent in sent_tokenizer.tokenize(ecl_text)]

In [7]:
for i, sent in enumerate(ecl_sents[:3], 1):
    print(f'{i}: {sent}')

1: Tityre, tu patulae recubans sub tegmine fagi / silvestrem tenui Musam meditaris avena;
2: nos patriae fines et dulcia linquimus arva:
3: nos patriam fugimus;


In [8]:
# Move strip to preprocess function

ecl_sents_pp = [preprocess(sent).strip() for sent in ecl_sents]

In [9]:
ecl_sents_pp_tokens = [sent.split() for sent in ecl_sents_pp]

In [10]:
ecl_sents_pp_bigrams = [list(bigrams(sent.split())) for sent in ecl_sents_pp]

In [11]:
matches = []

for sent in ecl_sents_pp_bigrams:
    matches_ = []
    for bigram in sent:
        if bigram[0] == bigram[1]:
            matches_.append(bigram)
    matches.append(matches_)

In [12]:
for i, match in enumerate(matches, 1):
    if match:
        for m in match:
            print(f'{i}: {" ".join(m)}\n{ecl_sents[i-1]}', '\n')

92: heu heu
Heu, heu, quid volui misero mihi! 

106: corydon corydon
Ah, Corydon, Corydon, quae te dementia cepit! 

170: uale uale
nam me discedere flevit, / et longum “formose, vale, vale,” inquit, “Iolla.” / Triste lupus stabulis, maturis frugibus imbres. 

188: heu heu
Heu, heu, quam pingui macer est mihi taurus in ervo! 

370: corydon corydon
ex illo Corydon Corydon est tempore nobis. 

413: orpheus orpheus
sit Tityrus Orpheus, / Orpheus in silvis, inter delphinas Arion. 

488: astrum astrum
Ecce Dionaei processit Caesaris astrum, / astrum, quo segetes gauderent frugibus, et quo / duceret apricis in collibus uva colorem. 



### Simple gemination in Latin literature

In [13]:
texts = tesserae.texts(files)

In [14]:
sents = [sent_tokenizer.tokenize(text) for text in texts]
sents = [[" ".join(y.split('\n')) for y in x] for x in sents] # remove line breaks for sents in poetry
sents_pp = [[preprocess(sent_) for sent_ in sent] for sent in sents]
sents_pp_bigrams = [[list(bigrams(sent_.split())) for sent_ in sent] for sent in sents_pp]

In [15]:
text_matches = []
for text in sents_pp_bigrams:
    matches = []
    for sent in text:
        matches_ = []
        for bigram in sent:
            if bigram[0] == bigram[1]:
                matches_.append(bigram)
        matches.append(matches_)
    text_matches.append(matches)    

In [16]:
examples = []
for i, text in enumerate(text_matches):
    for j, match in enumerate(text, 1):
        if match:
            for m in match:
                examples.append((files[i], j, " ".join(m), sents[i][j-1]))

In [17]:
for example in sample(examples, 25):
    print(f'{example[0]} sent. {example[1]}\n{example[2]}\n{example[3]}','\n')

texts/terence.hecyra.tess sent. 221
uestro uestro
Ubi duxere impulsu vestro, vestro impulsu easdem exigunt. 

texts/polignac.antilucretius.tess sent. 1132
se se
SIC pulso semper locus est, Spatiumque liquori,  Quo se se, prisca eiectus statione, receptet. 

texts/jerome.vulgate.part.21.psalms.tess sent. 1007
deus deus
Deus Deus meus ad te de luce vigilo sitivit in te anima mea quam multipliciter tibi caro mea; 

texts/seneca.medea.tess sent. 481
iam iam
Iam iam recepi sceptra germanum patrem, spoliumque Colchi pecudis auratae tenent; 

texts/jerome.vulgate.part.21a.old_latin_psalms.tess sent. 1967
benedicet benedicet
Dominus recordatus nostri benedicet benedicet domui Israhel benedicet domui Aaron; 

texts/jerome.vulgate.part.21a.old_latin_psalms.tess sent. 2019
obsecro obsecro
obsecro Domine salva obsecro obsecro Domine prosperare obsecro; 

texts/silius_italicus.punica.part.1.tess sent. 80
atlas atlas
ultra obsidet aequor, nec patitur nomen proferri longius Atlas, Atlas subducto trac

In [18]:
with open('results/simple-gemination.txt','w') as f:
    for example in examples:
        f.write(f'{example[0]}\nsentence {example[1]}\n{example[2]}\n{example[3]}\n\n')

In [19]:
_, _, gems, _ = zip(*examples)

In [20]:
print(Counter(gems).most_common(25))

[('iam iam', 67), ('se se', 56), ('a a', 53), ('x x', 39), ('e e', 34), ('amen amen', 31), ('deus deus', 29), ('c c', 27), ('nunc nunc', 26), ('heu heu', 26), ('te te', 21), ('est est', 18), ('iterumque iterumque', 18), ('ut ut', 17), ('i i', 17), ('non non', 16), ('me me', 16), ('modo modo', 16), ('age age', 14), ('ille ille', 14), ('sanctus sanctus', 14), ('domine domine', 14), ('mane mane', 14), ('r r', 14), ('quam quam', 13)]


### Bibliography

- Wills, J. 1996. *Repetition in Latin Poetry: Figures of Allusion*. Clarendon Press.