In [1]:
# In this notebook, I want to check some figures from Ceccarelli.
# Ceccarelli, L. (2008). Contributi per la storia dell'esametro latino.
# In particular Tab. 21 in Vol. 2, 46.
#
# Our figures disagree wildly on the use of Bucolic Diaeresis.
# 
# In his tables, he breaks the verses of the Aeneid down as
# 4_|  : 7376  (strong caes.)
# 4__| : 1324  (BD)
# 4_xx|: 495   (BD)
# 4_x| : 391   (weak caes.)

In [10]:
from bs4 import BeautifulSoup
import importlib
from mqdq import line_analyzer as la
from mqdq import utils
from collections import Counter

with open('../VERG-aene.xml') as fh:
    aen_soup = BeautifulSoup(fh,"xml")

aen = utils.clean(aen_soup('line'))
aen_books = [utils.clean(d('line')) for d in aen_soup('division')]   

In [11]:
# I already have a method in line_analyzer for this,
# which is mostly just used inside the caesura_counter.

# The weak caesurae almost match C's numbers...

Counter([la.classify_caesura(l,4,strict=False) for l in aen])

Counter({'S': 6450, '-': 2064, 'Q': 945, 'W': 381})

In [12]:
# What I call a 'quasi' caesura is when there would have been
# a 'real' caesura, but there is an elision that carries over
# the word break. These seem to be considered by C as a strong
# caesura, which is... odd?

6450+945

7395

In [13]:
# But C's two 'true' BD cases don't add up to my '-' caesura
# which is marked when the foot has no caesura at all.

1324+495

1819

In [14]:
# Anyway, my own counters for caesurae and my BD analyzer don't add up...
# 4844 is more than my '-' and 'Q' caesurae combined.

Counter([la.has_bd(l) for l in aen])

Counter({True: 4844, False: 4996})

In [52]:
# So, obviously we have a discrepancy. Let's just analyse a small random sample.

import random
random.seed(1)
s = random.sample(aen, 100)
s[:4]

[<line metre="H" name="662" pattern="SDSD">
 <word mf="SY" sy="1A">Postquam</word>
 <word sy="1T2A" wb="CM">altos</word>
 <word sy="2b2c3A" wb="CM">tetigit</word>
 <word sy="3T4A" wb="CM">fluctus</word>
 <word sy="4b" wb="CF">et</word>
 <word sy="4c" wb="DI">ad</word>
 <word sy="5A5b5c" wb="DI">aequora</word>
 <word sy="6A6X">uenit,</word>
 </line>, <line metre="H" name="437" pattern="SDSS">
 <word sy="1A1T2A" wb="CM">Defensum</word>
 <word sy="2b2c" wb="DI">dabit</word>
 <word sy="3A" wb="CM">et</word>
 <word mf="SY" sy="3T">magna</word>
 <word sy="4A4T" wb="DI">inter</word>
 <word sy="5A5b5c" wb="DI">praemia</word>
 <word sy="6A6X">ducet.</word>
 </line>, <line metre="H" name="283" pattern="SDSS">
 <word sy="1A1T2A2b" wb="CF">Exspectate</word>
 <word sy="2c3A" wb="CM">uenis?</word>
 <word sy="3T" wb="DI">ut</word>
 <word sy="4A" wb="CM">te</word>
 <word sy="4T" wb="DI">post</word>
 <word sy="5A5b" wb="CF">multa</word>
 <word sy="5c6A6X">tuorum</word>
 </line>, <line metre="H" name="3

In [53]:
Counter([la.classify_caesura(l,4,strict=False) for l in s])

Counter({'S': 67, '-': 20, 'Q': 9, 'W': 4})

In [54]:
# Looking below at how my analysis works, we can now see the problem.
# The first line has a word break after the arsis of the
# fourth foot, which is the definition of a strong caesura
# but ALSO a word break at the end of the fourth foot, which
# is the definition of a bucolic diaeresis. Same with the third
# and fourth lines, so I guess this happens a lot.
#
# Well, there's our problem, but whose interpretation is
# correct? Well, that's another matter entirely, and more a
# matter of interpreting the literature. I claim they should be
# counted as both BD and SC, but I'm not sure about the very first line---
# does it have all three features (BD, SC, WC)?? (technically, yes...)

print("\n\n".join(
        [utils.txt(l, scan=True) + 
        ' ----> ' + 
        (la.classify_caesura(l,4,strict=False)) +
        ", " +
        str(la.has_bd(l)) for l in s[:10]]
    )
)

Postquam altos tetigit fluctus et ad aequora uenit,
`1A_     `1T2A `2b2c3A `3T4A   4b 4c `5A5b5c `6A6X ----> S, True

Defensum dabit et magna inter praemia ducet.
1A`1T2A  `2b2c 3A `3T_  `4A4T `5A5b5c `6A6X ----> -, True

Exspectate uenis? ut te post multa tuorum
1A1T`2A2b  `2c3A  3T 4A 4T   `5A5b 5c`6A6X ----> S, True

Paulatim adnabam terrae; iam tuta  tenebam,
1A`1T_   2A`2T3A `3T4A   4T  `5A5b 5c`6A6X ----> S, True

Signa tibi  dicam, tu condita mente teneto:
`1A1b `1c2A `2T3A  3T `4A4b4c `5A5b 5c`6A6X ----> -, True

Funereas  rapuere   faces; lucet uia   longo
1A`1b1c2A 2b2c`3A3b `3c4A  `4T5A `5b5c `6A6X ----> S, False

"Nunc, o  lecta manus, ualidis incumbite remis;
1A     1T `2A2b `2c3A  `3b3c4A 4T`5A5b5c `6A6X ----> S, False

Duxisti et talis uoluisti expendere poenas?
1A`1T_  2A `2T3A 3b3c`4A_ 4T`5A5b5c `6A6X ----> Q, False

Conuulsum remis rostrisque tridentibus aequor.
1A`1T2A   `2T3A 3T`4A4b    4c`5A5b5c   `6A6X ----> W, False

Persoluo; hic uictor caestus artemque repono."

In [24]:
# There's one more problem. C's numbers don't add up. He uses 
# 9830 lines for Aen. (I use 9840, per MQDQ scansion), but his
# 4th feet only add up to 9586.

print(7376 + 1324 + 495 + 391)
print(9830 - (7376 + 1324 + 495 + 391))

9586
244


In [25]:
# So, there should be lines that have neither a 4th foot caesura, NOR
# BD. How many are there?

no_4th_caes = [l for l in aen if la.classify_caesura(l,4,strict=True)=='-']

In [26]:
# With strict=True we mark the caesura as '-' if the feet are completely contained in a word
# OR over an elision.
len(no_4th_caes)

3009

In [27]:
# So these should now be the lines with neither,
# so the fourth foot should be wrapped in a monster
# word or something.

neither = [l for l in no_4th_caes if la.has_bd(l)==False]

In [28]:
len(neither)

898

In [31]:
# And here's a sample of them. Mostly the elision type
# but we can see one at the end of the 'monster word'
# variety ('tempestatumque')

utils.blat(neither[:10])

Ostia,  diues opum  studiisque asperrima belli;
`1A1b1c `2A2b `2c3A 3b3c`4A_   4T`5A5b5c `6A6X

Iudicium  Paridis spretaeque iniuria   formae
1A`1b1c2A `2b2c3A 3T`4A_     4T`5A5b5c `6A6X

Troas, reliquias Danaum atque immitis Achilli,
`1A1T  2A`2b2c3A `3b3c_ `4A_  4T`5A5b 5c`6A6X

Nec posse Italia    Teucrorum auertere  regem?
1A  `1T_  2A`2b2c3A 3T`4A_    4T`5A5b5c `6A6X

Quippe uetor fatis. Pallasne exurere   classem
`1A1b  `1c2A `2T3A  3T`4A_   4T`5A5b5c `6A6X

Disiecitque rates euertitque aequora uentis,
1A1T`2A2b   `2c3A 3T4A`4T_   `5A5b5c `6A6X

Turbine corripuit scopuloque infixit acuto;
`1A1b1c 2A`2b2c3A 3b3c`4A_   4T`5A5b 5c`6A6X

Luctantis uentos tempestatesque sonoras
1A`1T2A   `2T3A  3T4A4T`5A5b    5c`6A6X

Incute  uim uentis submersasque obrue   puppes,
`1A1b1c 2A  `2T3A  3T4A`4T_     `5A5b5c `6A6X

Nimborumque facis tempestatumque potentem."
1A1T`2A2b   `2c3A 3T4A4T`5A5b    5c`6A6X


In [32]:
# Could the lines where the fourth foot is completely
# contained, and doesn't end the word account for
# C's missing 244 verses? No, there are actually more
# of that kind of line.

no_4th_caes_strict = [l for l in aen if la.classify_caesura(l,4,strict=False)=='-']
neither_strict = [l for l in no_4th_caes_strict if la.has_bd(l)==False]
len(neither_strict)

307

In [33]:
# And here's a sample of them.

utils.blat(neither_strict[:10])

Disiecitque rates euertitque aequora uentis,
1A1T`2A2b   `2c3A 3T4A`4T_   `5A5b5c `6A6X

Luctantis uentos tempestatesque sonoras
1A`1T2A   `2T3A  3T4A4T`5A5b    5c`6A6X

Incute  uim uentis submersasque obrue   puppes,
`1A1b1c 2A  `2T3A  3T4A`4T_     `5A5b5c `6A6X

Nimborumque facis tempestatumque potentem."
1A1T`2A2b   `2c3A 3T4A4T`5A5b    5c`6A6X

Conspexere, silent arrectisque auribus adstant;
1A1T`2A2b   `2c3A  3T4A`4T_    `5A5b5c `6A6X

Siue extrema pati  nec exaudire  uocatos.
`1A_ 1T`2A2b `2c3A 3T  4A4T`5A5b 5c`6A6X

Vultu, quo caelum tempestatesque serenat,
`1A1T  2A  `2T3A  3T4A4T`5A5b    5c`6A6X

Sed fines Libyci, genus intractabile bello.
1A  `1T2A `2b2c3A `3b3c 4A4T`5A5b5c  `6A6X

Passa Venus medio   sic interfata dolore  est:
`1A1b `1c2A `2b2c3A 3T  4A4T`5A5b 5c`6A6X _

Aut capere aut captas iam despectare uidentur:
1A  `1b1c_ 2A  `2T3A  3T  4A4T`5A5b  5c`6A6X


In [34]:
# But note that there's a gap between C's 1819 BD lines and my '-'
# caesura of just the right amount. It looks like those lines
# are all the ones that aren't included.

print(1819+244)
print(Counter([la.classify_caesura(l,4,strict=False) for l in aen])['-'])

2063
2064


In [39]:
# So the missing lines have (strictu sensu) no caesura in the 4th foot
# (ie not even with elision) and also no BD

no4th = [l for l in aen if la.classify_caesura(l,4,strict=False)=='-']
no4th_nobd = [l for l in no4th if not la.has_bd(l)]
len(no4th_nobd)

307

In [40]:
utils.blat(random.sample(no4th_nobd,10))

Si qua piis  animis  manet infelicis Amatae
1A 1b  `1c2A `2b2c3A `3b3c 4A4T`5A5b 5c`6A6X

Sola  sibi, semper longam incomitata  uidetur
`1A1b `1c2A `2T3A  `3T_   4A4b4c`5A5b 5c`6A6X

Quam cladem miserae postquam accepere  Latinae,
1A   `1T2A  `2b2c3A `3T_     4A4T`5A5b 5c`6A6X

"Venisti tandem, tuaque exspectata parenti
1A`1T2A  `2T3A   `3b3c_ 4A4T`5A5b  5c`6A6X

Dat Niso  Mnestheus pellem horrentisque leonis
1A  `1T2A `2T3A     `3T_   4A4T`5A5b    5c`6A6X

Certatim sese  Rutuli exhortantur in arma.
1A`1T2A  `2T3A `3b3c_ 4A4T`5A5b   5c `6A6X

Vertice attollens pater Appenninus ad auras.
`1A1b1c 2A`2T3A   `3b3c 4A4T`5A5b  5c `6A6X

Obuius  ire   parat. manet imperterritus ille
`1A1b1c `2A2b `2c3A  `3b3c 4A4T`5A5b5c   `6A6X

Pendentem scopulo Furiarumque ora   trementem,
1A`1T2A   `2b2c3A 3b3c4A`4T_  `5A5b 5c`6A6X

Si mihi  non animo   fixum immotumque sederet
1A `1b1c 2A  `2b2c3A `3T_  4A4T`5A5b  5c`6A6X


In [41]:
# 307 is almost the number we want, and looking at the sample, I think I know
# what's going on, now.

# Let's hack up a new function, I haven't needed this in line_analyser yet...

import re
def elision_after_foot(n, l):
    try:
        if l['pattern'] == 'corrupt':
            raise ValueError("Can't calculate conflicts on a corrupt line!")
            
        for w in l('word'):
            if re.search('%d[Tc]' % n, w['sy']): 
                if w.has_attr('mf') and w['mf']=='SY':
                    return True
                return False

        return False

    except:
        raise ValueError("Can't handle this: %s" % l)
        

In [42]:
# Ah! There are 241 lines which have no 4th foot caesura,
# no BD (by my method) and no elision after the 4th foot.
# Another way of saying that is that the 4th foot is entirely
# contained in a word.
#
# It looks like these are the ones that are missing entirely
# from Ceccarelli.

skipped=[l for l in no4th_nobd if not elision_after_foot(4,l)]
print(len(skipped))
print()
utils.blat(random.sample(skipped,5))

241

Vitaque cum gemitu  fugit indignata sub umbras.
`1A1b1c 2A  `2b2c3A `3b3c 4A4T`5A5b 5c  `6A6X

Alcandrumque Haliumque Noemonaque  Prytanimque.
1A1T`2A_     2b2c`3A3b 3c4A`4b4c5A 5b5c`6A6X

Qui fuit  in Teucris et seruantissimus aequi
1A  `1b1c 2A `2T3A   3T 4A4T`5A5b5c    `6A6X

Aut hoc inclusi ligno occultantur Achiui,
1A  1T  2A`2T3A `3T_  4A4T`5A5b   5c`6A6X

Parthus siue  Cydon, telum immedicabile, torsit,
`1A1T   `2A2b `2c3A  `3T_  4A4b4c`5A5b5c `6A6X


In [43]:
# Now I think I know enough to reverse engineer C's algorithm:
#
# - Lines in 'skipped' (as above, with no 4th foot caesura, no BD etc etc) are
#   not counted at all.
# - Lines are then classified according to the break in the fourth foot
#   BUT, caesurae are calculated before BD (if a line has both, it will)
#   be counted as the caesura
# - Elision is ignored (!!)

def ceccarelli(l):
    
    try:
        if l['pattern'] == 'corrupt':
            raise ValueError("Can't calculate conflicts on a corrupt line!")
        if l in skipped:
            return 'skip'
        
        for w in l('word'):
            # syllable string ends with A, and there's a wordbreak
            # so this is a strong caesura
            if re.search('4A$', w['sy']):
                return '4A'
            elif re.search('4b$', w['sy']):
                return '4b'
            # Because of the early returns, these are only reached
            # if there was no caesura
            elif re.search('4c$', w['sy']):
                return '4c'
            elif re.search('4T', w['sy']): 
                return '4T'
        
        return '-'

    except:
        raise ValueError("Can't handle this: %s" % l)

In [44]:
# Recall the real numbers:
# 4_|  : 7376  (strong caes.)
# 4__| : 1324  (BD)
# 4_xx|: 495   (BD)
# 4_x| : 391   (weak caes.)

c = Counter([ceccarelli(l) for l in aen])
print("Reverse engineered Ceccarelli:\n4_|  : %d\n4__| : %d\n4_xx|: %d\n4_x| : %d\n" % (c['4A'],c['4T'],c['4c'],c['4b']))

Reverse engineered Ceccarelli:
4_|  : 7384
4__| : 1328
4_xx|: 493
4_x| : 392



In [None]:
# So there we go. A mini replication study and mystery novel rolled into one.