In [1]:
from mqdq import babble, ngrams, elegy
from mqdq import line_analyzer as la

import numpy as np
import pandas as pd

from collections import defaultdict

In [3]:
non_elegy = []

aen_single_bab = babble.Babbler.from_file(
    "corpus/VERG-aene.xml", name="Aeneid", author="Vergil"
)
non_elegy.append(aen_single_bab)

geo_single_bab = babble.Babbler.from_file(
    "corpus/VERG-geor.xml", name="Georgics", author="Vergil"
)
non_elegy.append(geo_single_bab)

sat_single_bab = babble.Babbler.from_file(
    "corpus/IVV-satu.xml", name="Juv. Sat.", author="Juvenal"
)
non_elegy.append(sat_single_bab)

puni_single_bab = babble.Babbler.from_file(
    "corpus/SIL-puni.xml", name="Punica", author="Silius"
)
non_elegy.append(puni_single_bab)

theb_single_bab = babble.Babbler.from_file(
    "corpus/STAT-theb.xml", name="Thebaid", author="Statius"
)
non_elegy.append(theb_single_bab)

phars_single_bab = babble.Babbler.from_file(
    "corpus/LVCAN-phar.xml", name="Pharsalia", author="Lucan"
)
non_elegy.append(phars_single_bab)

arg_single_bab = babble.Babbler.from_file(
    "corpus/VAL_FL-argo.xml", name="Argonautica", author="V.Flaccus"
)
non_elegy.append(arg_single_bab)

rena_single_bab = babble.Babbler.from_file(
    "corpus/LVCR-rena.xml", name="DRN", author="Lucretius"
)
non_elegy.append(rena_single_bab)

horsat_single_bab = babble.Babbler.from_file(
    *sorted(glob.glob("corpus/HOR-sat*.xml")), name="Hor. Sat.", author="Horace"
)
non_elegy.append(horsat_single_bab)

In [4]:
def subsample(
    ary: list[babble.Babbler], mu, sd: float, n: int, min_length: int = 0
) -> list[babble.Babbler]:
    samps: list[babble.Babbler] = []
    lengths = [
        x for x in np.random.normal(mu, sd, n * 2).astype("int") if x > min_length
    ]
    for i in range(n):
        work = ary[i % len(ary)]
        l = lengths[i]
        start = np.random.randint(len(work) - l)
        b = babble.Babbler(
            work.raw_source[start : start + l],
            name=f"{i}-{work.name}",
            author=work.author,
        )
        samps.append(b)
    return samps

In [5]:
[b.author for b in non_elegy]

['Vergil',
 'Vergil',
 'Juvenal',
 'Silius',
 'Statius',
 'Lucan',
 'V.Flaccus',
 'Lucretius',
 'Horace']

In [6]:
non_elegy_samples = subsample(non_elegy, 100, 10, 900)

In [12]:
hexameter = defaultdict(list)

for b in non_elegy_samples:
    elisions = sum([la.elision_count(l) for l in b.raw_source])
    hexameter[f"{b.author}-{b.name.split('-')[1]}"].append(elisions/len(b.raw_source))

In [15]:
for k, v in hexameter.items():
    print(f"{k:<25}: Mean: {np.mean(v):.2f} Std: {np.std(v):.2f}")

Vergil-Aeneid            : Mean: 0.54 Std: 0.08
Vergil-Georgics          : Mean: 0.50 Std: 0.06
Juvenal-Juv. Sat.        : Mean: 0.35 Std: 0.09
Silius-Punica            : Mean: 0.44 Std: 0.07
Statius-Thebaid          : Mean: 0.38 Std: 0.08
Lucan-Pharsalia          : Mean: 0.13 Std: 0.04
V.Flaccus-Argonautica    : Mean: 0.27 Std: 0.07
Lucretius-DRN            : Mean: 0.45 Std: 0.10
Horace-Hor. Sat.         : Mean: 0.41 Std: 0.12


In [None]:
short_elegy = pd.read_csv('elegy_poetic.csv',index_col=0)

In [None]:
short_elegy.groupby(['Author','Work'])['ELC'].agg(['mean','std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Author,Work,Unnamed: 2_level_1,Unnamed: 3_level_1
Catullus,Cat.,0.491109,0.312907
Ovid,Am.,0.092543,0.056981
Ovid,Ep.,0.091924,0.030277
Ovid,Pont.,0.07797,0.042059
Ovid,Tr.,0.085079,0.044752
Propertius,Prop.,0.235744,0.10646
Tibullus,Tib.,0.10801,0.047177
ps-Ovid,Consolatio,0.242616,0.03815
ps-Ovid,Ibis,0.083271,0.044208
ps-Ovid,Medicamina,0.08,
