In [None]:
# In this notebook, I want to check some figures from Ceccarelli.
# Ceccarelli, L. (2008). Contributi per la storia dell'esametro latino.
# Our figures disagree wildly on the use of Bucolic Diaeresis.
# 
# In his tables, he breaks the verses of the Aeneid down as
# 4_|  : 7376  (strong caes.)
# 4__| : 1324  (BD)
# 4_xx|: 495   (BD)
# 4_x| : 391   (weak caes.)

In [3]:
from bs4 import BeautifulSoup
import importlib
from mqdq import counter_factory as cf
from mqdq import line_analyzer as la
from mqdq import metrics
from collections import Counter

with open('../VERG-aene.xml') as fh:
    aen_soup = BeautifulSoup(fh,"xml")

aen = [l for l in aen_soup('line') if l['pattern'] != 'corrupt']
aen_books = [list(filter(lambda l: l['pattern']!='corrupt', d('line'))) for d in aen_soup('division')]  

In [4]:
# I already have a method in line_analyzer for this,
# which is mostly just used inside the caesura_counter.

# The weak caesurae almost match C's numbers...

Counter([la.classify_caesura(l,4,strict=False) for l in aen])

Counter({'S': 6450, '-': 2064, 'Q': 945, 'W': 381})

In [21]:
# What I call a 'quasi' caesura is when there would have been
# a 'real' caesura, but there is an elision that carries over
# the word break. These seem to be considered by C as a strong
# caesura, which is... odd?

6450+945

7395

In [22]:
# But C's two 'true' BD cases don't add up to my '-' caesura
# which is marked when the foot has no caesura at all.

1324+495

1819

In [7]:
# Anyway, my own counters for caesurae and my BD analyzer don't add up...
# 4844 is more than my '-' and 'Q' caesurae combined.

Counter([la.has_bd(l) for l in aen])

Counter({True: 4844, False: 4996})

In [8]:
# So, obviously we have a discrepancy. Let's just analyse a small random sample.

import random
s = random.sample(aen, 100)
s[:4]

[<line metre="H" name="263" pattern="DSSS">
 <word sy="1A1b" wb="CF">Ipse</word>
 <word mf="SY" sy="1c">modo</word>
 <word sy="2A2T3A" wb="CM">Aeneas,</word>
 <word sy="3T4A" wb="CM">nostri</word>
 <word sy="4T" wb="DI">si</word>
 <word sy="5A5b" wb="CF">tanta</word>
 <word sy="5c6A6X">cupidost,</word>
 </line>, <line metre="H" name="494" pattern="DSSS">
 <word sy="1A1b1c" wb="DI">Largior.</word>
 <word sy="2A" wb="CM">haud</word>
 <word sy="2T3A" wb="CM">illi</word>
 <word sy="3T4A" wb="CM">stabunt</word>
 <word sy="4T5A5b5c" wb="DI">Aeneia</word>
 <word sy="6A6X">paruo</word>
 </line>, <line metre="H" name="324" pattern="DSSS">
 <word mf="SY" sy="1A1b1c">Luctificam</word>
 <word sy="2A2T3A" wb="CM">Allecto</word>
 <word mf="SY" sy="3T4A">dirarum</word>
 <word sy="4T" wb="DI">ab</word>
 <word sy="5A5b" wb="CF">sede</word>
 <word sy="5c6A6X">dearum</word>
 </line>, <line metre="H" name="602" pattern="SSDD">
 <word sy="1A" wb="CM">Qui</word>
 <word sy="1T2A" wb="CM">primi</word>
 <word 

In [9]:
Counter([la.classify_caesura(l,4,strict=False) for l in s])

Counter({'S': 62, 'Q': 12, 'W': 5, '-': 21})

In [20]:
# Looking at how my analysis works, we can now see the problem.
# The first line has a word break after the thesis of the
# fourth foot, which is the definition of a strong caesura
# but ALSO a word break at the end of the fourth foot, which
# is the definition of a bucolic diaeresis.
#
# Well, there's our problem, but whose interpretation is
# correct? Well, that's another matter entirely.

print("\n".join([la.txt(l, scan=True)+'    > ' + (la.classify_caesura(l,4,strict=False)) + "  " + str(la.has_bd(l)) for l in s[:10]]))

Ipse modo Aeneas, nostri si tanta cupidost,
1A1b 1c_  2A2T3A  3T4A   4T 5A5b  5c6A6X    > S  True
Largior. haud illi stabunt Aeneia   paruo
1A1b1c   2A   2T3A 3T4A    4T5A5b5c 6A6X    > S  False
Luctificam Allecto dirarum ab sede dearum
1A1b1c_    2A2T3A  3T4A_   4T 5A5b 5c6A6X    > Q  True
Qui primi finis aliquando habuere  Latinos.
1A  1T2A  2T3A  3b3c4A_   4b4c5A5b 5c6A6X    > Q  False
Effuge et haec Turno mandata nouissima perfer:
1A1b_  1c 2A   2T3A  3T4A4b  4c5A5b5c  6A6X    > W  False
Seruati facimus meritosque nouamus honores.
1A1T2A  2b2c3A  3b3c4A4b   4c5A5b  5c6A6X    > W  False
Dardaniumque ducem Laurentia uexerit arua,
1A1b1c2A2b   2c3A  3T4A4b4c  5A5b5c  6A6X    > -  True
Tum sic pauca refert: "ut te, fortissime Teucrum,
1A  1T  2A2b  2c3A    3T  4A  4T5A5b5c   6A6X    > S  False
At media  socios incedens naue per ipsos
1A 1b1c2A 2b2c3A 3T4A4T   5A5b 5c  6A6X    > -  True
Nec contra uiris audet Saturnia Iuno
1A  1T2A   2T3A  3T4A  4T5A5b5c 6A6X    > S  False


In [40]:
# There's one more problem. C's numbers don't add up. He uses 
# 9830 lines for Aen. (I use 9840, per MQDQ scansion), but his
# 4th feet only add up to 9586.

print(7376 + 1324 + 495 + 391)
print(9830 - (7376 + 1324 + 495 + 391))

9586
244


In [30]:
# So, there should be lines that have neither a 4th foot caesura, NOR
# BD. How many are there?

no_4th_caes = [l for l in aen if la.classify_caesura(l,4,strict=True)=='-']

In [31]:
# With strict=True we mark the caesura as '-' if the feet are completely contained in a word
# OR over an elision.
len(no_4th_caes)

3009

In [33]:
# So these should now be the lines with neither,
# so the fourth foot should be wrapped in a monster
# word or something.

neither = [l for l in no_4th_caes if la.has_bd(l)==False]

In [34]:
len(neither)

898

In [35]:
# And here's a sample of them. Mostly the elision type
# but we can see one at the end of the 'monster word'
# variety ('tempestatumque')

print("\n".join([la.txt(l, scan=True) for l in neither][:10]))

Ostia, diues opum studiisque asperrima belli;
1A1b1c 2A2b  2c3A 3b3c4A_    4T5A5b5c  6A6X
Iudicium Paridis spretaeque iniuria  formae
1A1b1c2A 2b2c3A  3T4A_      4T5A5b5c 6A6X
Troas, reliquias Danaum atque immitis Achilli,
1A1T   2A2b2c3A  3b3c_  4A_   4T5A5b  5c6A6X
Nec posse Italia   Teucrorum auertere regem?
1A  1T_   2A2b2c3A 3T4A_     4T5A5b5c 6A6X
Quippe uetor fatis. Pallasne exurere  classem
1A1b   1c2A  2T3A   3T4A_    4T5A5b5c 6A6X
Disiecitque rates euertitque aequora uentis,
1A1T2A2b    2c3A  3T4A4T_    5A5b5c  6A6X
Turbine corripuit scopuloque infixit acuto;
1A1b1c  2A2b2c3A  3b3c4A_    4T5A5b  5c6A6X
Luctantis uentos tempestatesque sonoras
1A1T2A    2T3A   3T4A4T5A5b     5c6A6X
Incute uim uentis submersasque obrue  puppes,
1A1b1c 2A  2T3A   3T4A4T_      5A5b5c 6A6X
Nimborumque facis tempestatumque potentem."
1A1T2A2b    2c3A  3T4A4T5A5b     5c6A6X


In [37]:
# Could the lines where the fourth foot is completely
# contained, and doesn't end the word account for
# C's missing 244 verses? No, there are actually more
# of that kind of line.

no_4th_caes_strict = [l for l in aen if la.classify_caesura(l,4,strict=False)=='-']
neither_strict = [l for l in no_4th_caes_strict if la.has_bd(l)==False]
len(neither_strict)

307

In [38]:
# And here's a sample of them.

print("\n".join([la.txt(l, scan=True) for l in neither_strict][:10]))

Disiecitque rates euertitque aequora uentis,
1A1T2A2b    2c3A  3T4A4T_    5A5b5c  6A6X
Luctantis uentos tempestatesque sonoras
1A1T2A    2T3A   3T4A4T5A5b     5c6A6X
Incute uim uentis submersasque obrue  puppes,
1A1b1c 2A  2T3A   3T4A4T_      5A5b5c 6A6X
Nimborumque facis tempestatumque potentem."
1A1T2A2b    2c3A  3T4A4T5A5b     5c6A6X
Conspexere, silent arrectisque auribus adstant;
1A1T2A2b    2c3A   3T4A4T_     5A5b5c  6A6X
Siue extrema pati nec exaudire uocatos.
1A_  1T2A2b  2c3A 3T  4A4T5A5b 5c6A6X
Vultu, quo caelum tempestatesque serenat,
1A1T   2A  2T3A   3T4A4T5A5b     5c6A6X
Sed fines Libyci, genus intractabile bello.
1A  1T2A  2b2c3A  3b3c  4A4T5A5b5c   6A6X
Passa Venus medio  sic interfata dolore est:
1A1b  1c2A  2b2c3A 3T  4A4T5A5b  5c6A6X _
Aut capere aut captas iam despectare uidentur:
1A  1b1c_  2A  2T3A   3T  4A4T5A5b   5c6A6X


In [42]:
# But note that there's a gap between C's 1819 BD lines and my '-'
# caesura of just the right amount. Not sure what's going on here.

print(1819+244)
print(Counter([la.classify_caesura(l,4,strict=False) for l in aen])['-'])

2063


2064