# Mosaics etc.

In this notebook I run through the calculations for some of the claims about mosaic rhymes, and explore some of the features for rhyme searching and rhyme display.

In [1]:
from bs4 import BeautifulSoup

import importlib

from mqdq import rhyme, utils, rhyme_classes, babble
from mqdq import line_analyzer as la

import random
import operator
import bisect
import string
import copy
import re
import scipy as sp
import pandas as pd
import glob
import umap
import os
import subprocess

In [2]:
allbabs = []

tris_bab = babble.Babbler.from_file(*sorted(glob.glob('mqdq/OV-tri*.xml')), name = 'Tristia')
allbabs.append(tris_bab)

tib_babs = babble.multibabs(sorted(glob.glob('mqdq/TIB-ele*.xml')),"Tibullus")
allbabs.extend(tib_babs)

tib_single_bab = babble.Babbler.from_file(*sorted(glob.glob('mqdq/TIB-ele*.xml')),name ="Tibullus")
allbabs.append(tib_single_bab)

cat_soup, cat_ll = utils.slurp('mqdq/CATVLL-carm.xml')
cat64_bab = babble.Babbler(utils.clean(cat_soup('division')[63]('line')), name = "Cat64")
allbabs.append(cat64_bab)

aen_babs = babble.bookbabs('mqdq/VERG-aene.xml', 'Aeneid')
allbabs.extend(aen_babs)

aen_single_bab = babble.Babbler.from_file('mqdq/VERG-aene.xml', name='Aeneid')
allbabs.append(aen_single_bab)

geo_babs = babble.bookbabs('mqdq/VERG-geor.xml', 'Georgics')
allbabs.extend(geo_babs)

geo_single_bab = babble.Babbler.from_file('mqdq/VERG-geor.xml', name='Georgics')
allbabs.append(geo_single_bab)

sat_babs = babble.bookbabs('mqdq/IVV-satu.xml', 'Juv. Sat.')
allbabs.extend(sat_babs)

sat_single_bab = babble.Babbler.from_file('mqdq/IVV-satu.xml', name='Juv. Sat.')
allbabs.append(sat_single_bab)

met_babs = babble.bookbabs('mqdq/OV-meta.xml', 'Metamorphoses')
allbabs.extend(met_babs)

met_single_bab = babble.Babbler.from_file('mqdq/OV-meta.xml', name='Metamorphoses')
allbabs.append(met_single_bab)

puni_babs = babble.bookbabs('mqdq/SIL-puni.xml', 'Punica')
allbabs.extend(puni_babs)

puni_single_bab = babble.Babbler.from_file('mqdq/SIL-puni.xml', name='Punica')
allbabs.append(puni_single_bab)

theb_babs = babble.bookbabs('mqdq/STAT-theb.xml', 'Thebaid')
allbabs.extend(theb_babs)

theb_single_bab = babble.Babbler.from_file('mqdq/STAT-theb.xml', name='Thebaid')
allbabs.append(theb_single_bab)

phars_babs = babble.bookbabs('mqdq/LVCAN-phar.xml', 'Pharsalia')
allbabs.extend(phars_babs)

phars_single_bab = babble.Babbler.from_file('mqdq/LVCAN-phar.xml', name='Pharsalia')
allbabs.append(phars_single_bab)

prop_babs = babble.multibabs(sorted(glob.glob('mqdq/PROP-ele*.xml')),"Propertius")
allbabs.extend(prop_babs)

prop_single_bab = babble.Babbler.from_file(*sorted(glob.glob('mqdq/PROP-ele*.xml')),name ="Propertius")
allbabs.append(prop_single_bab)

ep_bab = babble.Babbler.from_file('mqdq/OV-epis.xml', name="Heroides")
allbabs.append(ep_bab)

aram_bab = babble.Babbler.from_file('mqdq/OV-aram.xml', name="Ars")
allbabs.append(aram_bab)

fast_bab = babble.Babbler.from_file('mqdq/OV-fast.xml', name="Fasti")
allbabs.append(fast_bab)

arg_babs = babble.bookbabs('mqdq/VAL_FL-argo.xml', 'Argonautica')
allbabs.extend(arg_babs)

arg_single_bab = babble.Babbler.from_file('mqdq/VAL_FL-argo.xml', name='Argonautica')
allbabs.append(arg_single_bab)

rena_babs = babble.bookbabs('mqdq/LVCR-rena.xml', 'DRN')
allbabs.extend(rena_babs)

rena_single_bab = babble.Babbler.from_file('mqdq/LVCR-rena.xml', name='DRN')
allbabs.append(rena_single_bab)

apot_bab = babble.Babbler.from_file('mqdq/PRVD-apot.xml', name='Apotheosis')
allbabs.append(apot_bab)

hamart_bab = babble.Babbler.from_file('mqdq/PRVD-hama.xml', name='Hamartigenia')
allbabs.append(hamart_bab)

psych_bab = babble.Babbler.from_file('mqdq/PRVD-psyc.xml', name='Psychomachia')
allbabs.append(psych_bab)

horsat_babs = babble.multibabs(sorted(glob.glob('mqdq/HOR-sat*.xml')),"Hor. Sat.")
allbabs.extend(horsat_babs)

horsat_single_bab = babble.Babbler.from_file(*sorted(glob.glob('mqdq/HOR-sat*.xml')),name ="Hor. Sat.")
allbabs.append(horsat_single_bab)

## Custom Mosaic search function

Note that this does not include a score for line initial rhymes! Those statistics were introduced after the mosaic analysis had been performed. This is something that should probably be added in future (it is a fairly minor change to the code) but the aim of this notebook is to cover the work as it was done to support the preprint.

In [5]:
NEUTRAL = {
    "ult_count": 1.0,
    "ult_score": 1.0,
    "penult_count": 1.0,
    "penult_score": 1.0,
    "ante_count": 1.0,
    "ante_score": 1.0,
    "mid_count": 1.0,
    "mid_score": 1.0,
    "score_bias": 0.5,
    "score_exponent": 2.5,
}

def locate_mosaics(bab, thresh=5.5):
    
    '''
    Find all the four-line mosaics with a score above a certain threshold
    in a supplied Babbler (representing one text)
    '''
    
    # sll is already syllabified (creates a preprocessed LineSet)
    sll = bab._syl_source()

    res = []
    for idx in range(0,len(sll)-4):
        ll = copy.copy(sll[idx:idx+4])
        # These methods link (assign rhymes) and color the words in the LineSet
        ll.colorlink()
        s = ll.score(config=NEUTRAL)
        if s >= thresh:
            res.append((ll,s))
    # sort by score, descending
    res.sort(key=lambda x: x[1], reverse=True)
    return res

In [10]:
# This is the general approach, which yields "Mosaics per thousand lines"

met_res = locate_mosaics(met_single_bab, thresh=5.5)
len(met_res)/len(met_single_bab.raw_source)*1000

34.14106087101341

In [9]:
# The results from the Met reflect Ovid's love of repeats with minor variation

for q, s in met_res[:10]:
    utils.nbshow(q, book=True)
    print('\n' + '-'*15 + "^ Score: %.2f ^" % s + '-'*15 + '\n')


---------------^ Score: 9.30 ^---------------




---------------^ Score: 8.63 ^---------------




---------------^ Score: 8.42 ^---------------




---------------^ Score: 8.41 ^---------------




---------------^ Score: 8.33 ^---------------




---------------^ Score: 8.33 ^---------------




---------------^ Score: 8.20 ^---------------




---------------^ Score: 8.17 ^---------------




---------------^ Score: 8.16 ^---------------




---------------^ Score: 8.14 ^---------------



## Another custom search function - Golden Lions

This is really just as an example of the flexibility of the scan/search pattern.

In [11]:
STRIP_STRESS = str.maketrans({'`':None})
def shiny_leo(ll):
    
    '''
    Implement the search function pattern required by the Babbler class.
    
    In this case, search for 'golden lions' which is a pattern I made up,
    consisting of a leonine rhyme which also encloses an alliteration.
    '''
    
    if len(ll) != 1:
        raise ValueError("Need %s line." % 1)

    l = copy.copy(ll[0])
    w1 = l.fetch("mid")
    w2 = l.fetch(-1)
    s = rhyme.word_rhyme(w1, w2)
    if not s >= 1.75:
        return None
    # once word_rhyme is above the threshold both words have syls
    try:
        f1 =l[-2].syls[0].translate(STRIP_STRESS).lower()[0]
        f2 = l[-3].syls[0].translate(STRIP_STRESS).lower()[0]
    except:
        print(l)
    if not( f1==f2 and l[-4]==w1 ):
        return None

    w1.color = w2.get_color()
    w2.color = w1.color
    w1.lock_color, w2.lock_color = True, True
    if s > w1.best_match:
        w1.best_match = s
        w1.best_word = w2
    if s > w2.best_match:
        w1.best_match = s
        w2.best_word = w1

    return l
shiny_leo.length = 1
shiny_leo.name = "shiny leo"
shiny_leo.baseline = None

In [12]:
aen_lions = aen_single_bab.scan(shiny_leo, gather=True)

In [20]:
# the scan function returns a triple, true_count, false_count, gathered_results
# nbshow is a utility method to colour and display a LineSet in Jupyter

utils.nbshow(aen_lions[2], book=True)

## (Extra) Mosaics per Thousand - All Authors

One or two stats for mosaics per thousand appear in the paper, for a few authors, but there is no full analysis (there wasn't space) so I may as well produce it here. Mosaic rhymes are discussed in Sec. 4.1.2.

In [21]:
full_works = [x for x in allbabs if not re.search('[0-9]', x.name)]

In [38]:
mpt = []
for w in full_works:
    r = locate_mosaics(w, thresh=5.5)
    x = len(r)/len(w.raw_source)*1000
    mpt.append({'work':w.name, 'mpt':x})
mpt_df = pd.DataFrame(mpt)

The results show the striking amount of additional rhyming ornamentation in the elegaic works. To a great extent this is supported by the increased propensity to leonine rhyme (if there are many leonine rhymes, it doesn't take many more to reach the threshold for a mosaic). Manually inspecting the results, however, also shows that line-initial rhymes come up quite often in the results, even though, as discussed above, they don't contribute to the mosaic score in this version of the code. Line initial rhyme is another feature which is more common in elegy vs epic.

Note that, as I mention in the paper, even the modest number in the Aeneid (22 per thousand) is too great to have occurred by chance. It appears that this kind of ornamentation was used fairly sparingly (although Propertius' use could hardly be called sparing), but deliberately. 

In [39]:
mpt_df.sort_values(by='mpt', ascending=False)

Unnamed: 0,work,mpt
9,Propertius,85.058618
1,Tibullus,79.774376
12,Fasti,67.578439
10,Heroides,63.006073
0,Tristia,61.155153
11,Ars,54.506438
15,Apotheosis,37.822878
16,Hamartigenia,37.267081
13,Argonautica,35.245459
8,Pharsalia,34.615385


In [40]:
fasti_res = locate_mosaics(fast_bab, thresh=5.5)

In [41]:
for q, s in fasti_res[:7]:
    utils.nbshow(q, book=True)
    print('\n' + '-'*15 + "^ Score: %.2f ^" % s + '-'*15 + '\n')


---------------^ Score: 11.36 ^---------------




---------------^ Score: 10.03 ^---------------




---------------^ Score: 9.72 ^---------------




---------------^ Score: 9.58 ^---------------




---------------^ Score: 9.42 ^---------------




---------------^ Score: 9.07 ^---------------




---------------^ Score: 8.92 ^---------------

