In [1]:
# Imports

import os
import string
import re
from collections import Counter
import html  

from pprint import pprint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from cltk.corpus.latin import latinlibrary
from cltk.tokenize.word import WordTokenizer
from cltk.stem.latin.j_v import JVReplacer

In [2]:
# Setup CLTK tools

word_tokenizer = WordTokenizer('latin')
replacer = JVReplacer()

## Get text of the Georgics

In [3]:
# Get the Aeneid texts

files = latinlibrary.fileids()
georgics_files = [file for file in files if 'vergil/geo' in file]
georgics_raw = [latinlibrary.raw(file) for file in georgics_files]

In [4]:
# Preprocess texts

def preprocess(text):

    remove_list = [r'\bVergil\b',
                   r'\bThe Latin Library\b',
                   r'\bThe Classics Page',
                   r'\bP. VERGILI MARONIS GEORGICON LIBER .+?\b'
                  ]
    
    for pattern in remove_list:
        text = re.sub(pattern, '', text)
    
    # REMOVE LETTER HEADINGS?
    
    text = html.unescape(text) # Handle html entities
    text = re.sub(r'&nbsp;?', ' ',text) #&nbsp; stripped incorrectly in corpus?
    text = re.sub(r'\x00',' ',text) #Another space problem?
    
    # Remove roman numeral headings; must be before lower & replacer
    #text = re.sub(r'\b(M{1,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})|M{0,4}(CM|C?D|D?C{1,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,4})|M{0,4}(CM|CD|D?C{0,3})(XC|X?L|L?X{1,3})(IX|IV|V?I{0,3})|M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|I?V|V?I{1,3}))\b[\.]',' ',text)
    
    text = text.lower()
    text = replacer.replace(text) #Normalize u/v & i/j
    
    # Herdan also normalizes 'qu' to 'c'
    #text = re.sub('qu', 'k', text)
    #text = re.sub('cui', 'ku', text)
    
    
    punctuation ="\"#$%&\'()*+,-/:;<=>@[\]^_`{|}~.?!«»"
    translator = str.maketrans({key: " " for key in punctuation})
    text = text.translate(translator)
    
    translator = str.maketrans({key: " " for key in '0123456789'})
    text = text.translate(translator)
    
    text = re.sub('[ ]+',' ', text) # Remove double spaces
    text = re.sub('\s+\n+\s+','\n', text) # Remove double lines and trim spaces around new lines
    
    return text

In [5]:
# Preprocess the Georgics texts

georgics_edit = [preprocess(raw) for raw in georgics_raw]
print(georgics_edit[0])


quid faciat laetas segetes quo sidere terram
uertere maecenas ulmisque adiungere uitis
conueniat quae cura boum qui cultus habendo
sit pecori apibus quanta experientia parcis
hinc canere incipiam uos o clarissima mundi
lumina labentem caelo quae ducitis annum
liber et alma ceres uestro si munere tellus
chaoniam pingui glandem mutauit arista
poculaque inuentis acheloia miscuit uuis
et uos agrestum praesentia numina fauni
ferte simul faunique pedem dryadesque puellae
munera uestra cano tuque o cui prima frementem
fudit equum magno tellus percussa tridenti
neptune et cultor nemorum cui pinguia ceae
ter centum niuei tondent dumeta iuuenci
ipse nemus linquens patrium saltusque lycaei
pan ouium custos tua si tibi maenala curae
adsis o tegeaee fauens oleaeque minerua
inuentrix uncique puer monstrator aratri
et teneram ab radice ferens siluane cupressum
dique deaeque omnes studium quibus arua tueri
quique nouas alitis non ullo semine fruges
quique satis largum caelo demittitis imbrem
tuque ad

In [6]:
# Get a list of initial letters

def get_lines(text):
    lines = text.split('\n')
    lines = [line.strip() for line in lines if line] # Test for blank lines
    return lines

def get_initials(lines):
    return [line[0] for line in lines]


In [7]:
georgics_lines = [get_lines(edit) for edit in georgics_edit]
georgics_initials = [get_initials(lines) for lines in georgics_lines]

In [8]:
# Function for combining list elements into various length strings

def find_ngrams(input_list, n):
    temp = list(zip(*[input_list[i:] for i in range(n)]))
    ngrams = ["".join(t) for t in temp]
    return ngrams


In [9]:
georgics_bigrams = [find_ngrams(initial,2) for initial in georgics_initials]

In [10]:
def get_x_match(bigram):
    return bigram[0] == bigram[1]

In [11]:
georgics_matches = []

for bigrams in georgics_bigrams:
    xx = []

    for bigram in bigrams:
        if get_x_match(bigram):
            xx.append(bigram[0])
    
    georgics_matches.append(xx)

print(georgics_matches)

[['l', 'q', 'a', 'a', 'i', 'i', 'c', 'a', 'u', 's', 's', 'e', 'e', 'c', 'a', 't', 'c', 'e', 'i', 'm', 'n', 'i', 'a', 't', 'c', 'a', 'c', 'a', 'a', 'n', 'n', 'i', 's', 'a', 'a', 'i'], ['p', 's', 'n', 'n', 'n', 'a', 'n', 'h', 'a', 'a', 'q', 'e', 'i', 'q', 'a', 'a', 'a', 'a', 'a', 'n', 'u', 's', 'a', 'p', 'e', 'a', 'p', 'i', 'n', 'n', 'n', 'a', 'i', 'i', 'h', 's'], ['i', 'c', 'c', 'i', 'i', 'e', 's', 't', 'c', 'c', 't', 'i', 'f', 'i', 's', 's', 'n', 'p', 'i', 's', 'a', 'c', 'u', 'e', 's', 'i', 'a', 'a', 'i', 'c', 'n', 'd', 'p', 'n', 'p', 'i', 'u'], ['m', 'f', 'o', 'a', 'u', 'u', 'p', 'e', 's', 't', 't', 'i', 'c', 'e', 'i', 'd', 'a', 'i', 'i', 's', 'a', 'i', 'e', 's', 'e', 's', 'a', 'a', 'e', 'e', 's', 'm', 'a', 'e', 'p', 'p', 'o', 't', 'q', 'c', 'o', 's', 'i', 'a', 't', 't', 'm', 'i', 'q', 'q', 'c']]


In [12]:
georgics_match_counts = [Counter(match) for match in georgics_matches]
georgics_match_counts = [dict(count.items()) for count in georgics_match_counts]
l = georgics_match_counts
print(georgics_match_counts)

[{'l': 1, 'q': 1, 'a': 10, 'i': 6, 'c': 5, 'u': 1, 's': 3, 'e': 3, 't': 2, 'm': 1, 'n': 3}, {'p': 3, 's': 3, 'n': 8, 'a': 11, 'h': 2, 'q': 2, 'e': 2, 'i': 4, 'u': 1}, {'i': 9, 'c': 6, 'e': 2, 's': 5, 't': 2, 'f': 1, 'n': 3, 'p': 3, 'a': 3, 'u': 2, 'd': 1}, {'m': 3, 'f': 1, 'o': 3, 'a': 7, 'u': 2, 'p': 3, 'e': 7, 's': 6, 't': 5, 'i': 7, 'c': 3, 'd': 1, 'q': 3}]


In [13]:
alpha_keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u']

from collections import defaultdict
d = defaultdict(list)

for letter in alpha_keys:
    for item in l:
        if letter in item.keys():
            d[letter].append(item[letter])
        else:
            d[letter].append(0)

print(d)
            

defaultdict(<class 'list'>, {'a': [10, 11, 3, 7], 'b': [0, 0, 0, 0], 'c': [5, 0, 6, 3], 'd': [0, 0, 1, 1], 'e': [3, 2, 2, 7], 'f': [0, 0, 1, 1], 'g': [0, 0, 0, 0], 'h': [0, 2, 0, 0], 'i': [6, 4, 9, 7], 'l': [1, 0, 0, 0], 'm': [1, 0, 0, 3], 'n': [3, 8, 3, 0], 'o': [0, 0, 0, 3], 'p': [0, 3, 3, 3], 'q': [1, 2, 0, 3], 'r': [0, 0, 0, 0], 's': [3, 3, 5, 6], 't': [2, 0, 2, 5], 'u': [1, 1, 2, 2]})


In [14]:

#df['Gaps'] = ['XX'] * 4
#df.index.name = 'Gaps'
#df['Books'] = [1,2,3,4]
#df.set_index('Books', append=True, inplace=True)
#df.reindex(index=['Gaps','Book'])
#df.stack(level=['Gaps','Books'])
#df.pivot('Gaps')

tuples = list(zip(*[['XX']*4,['1','2','3','4']]))
index = pd.MultiIndex.from_tuples(tuples, names=['Gaps', 'Book'])
df = pd.DataFrame.from_dict(d)
df.index = index
df


Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,u
Gaps,Book,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
XX,1,10,0,5,0,3,0,0,0,6,1,1,3,0,0,1,0,3,2,1
XX,2,11,0,0,0,2,0,0,2,4,0,0,8,0,3,2,0,3,0,1
XX,3,3,0,6,1,2,1,0,0,9,0,0,3,0,3,0,0,5,2,2
XX,4,7,0,3,1,7,1,0,0,7,0,3,0,3,3,3,0,6,5,2


In [15]:
print(georgics_edit[0][:1000])


quid faciat laetas segetes quo sidere terram
uertere maecenas ulmisque adiungere uitis
conueniat quae cura boum qui cultus habendo
sit pecori apibus quanta experientia parcis
hinc canere incipiam uos o clarissima mundi
lumina labentem caelo quae ducitis annum
liber et alma ceres uestro si munere tellus
chaoniam pingui glandem mutauit arista
poculaque inuentis acheloia miscuit uuis
et uos agrestum praesentia numina fauni
ferte simul faunique pedem dryadesque puellae
munera uestra cano tuque o cui prima frementem
fudit equum magno tellus percussa tridenti
neptune et cultor nemorum cui pinguia ceae
ter centum niuei tondent dumeta iuuenci
ipse nemus linquens patrium saltusque lycaei
pan ouium custos tua si tibi maenala curae
adsis o tegeaee fauens oleaeque minerua
inuentrix uncique puer monstrator aratri
et teneram ab radice ferens siluane cupressum
dique deaeque omnes studium quibus arua tueri
quique nouas alitis non ullo semine fruges
quique satis largum caelo demittitis imbrem
tuque ad

In [16]:
def find_skipgrams(input_list, n, step=0):
    lists = [input_list[i:] for i in range(n+step)]
    skipgrams = list(zip(lists[0],lists[-1]))
    return skipgrams

In [17]:
georgics_skipgrams = [find_skipgrams(initial,2,1) for initial in georgics_initials]

In [18]:
georgics_matches = []

for bigrams in georgics_skipgrams:
    x1x = []

    for bigram in bigrams:
        if get_x_match(bigram):
            x1x.append(bigram[0])
    
    georgics_matches.append(x1x)

print(georgics_matches)

[['f', 't', 'a', 'q', 'n', 'i', 'a', 'p', 'a', 'a', 's', 's', 's', 'e', 'i', 'e', 'u', 'n', 'u', 't', 'i', 'e', 'a', 'q', 'm', 'a', 'e', 't', 's', 'a', 'a', 'e', 'q', 's', 's', 'e', 'a', 'e', 'n', 'n', 'n', 'e', 't', 'h', 's', 'i', 'u', 'i', 'a', 'u', 'a'], ['p', 'i', 'i', 'n', 's', 'n', 'q', 'a', 'e', 's', 'a', 'h', 'p', 'h', 's', 'e', 'a', 'e', 'a', 'q', 'a', 's', 'a', 'a', 'a', 'a', 'a', 'a', 's', 'c', 'c', 'f', 'i', 'e', 'o', 'e', 'e', 'a', 'p', 'c', 'a', 'n', 'a', 'f', 's', 'a', 's', 'i', 'i', 'a'], ['p', 'p', 'c', 'a', 'i', 'i', 't', 'a', 'a', 's', 'i', 'c', 'i', 'e', 's', 's', 's', 'h', 'e', 's', 'a', 's', 'a', 'u', 't', 'h', 'h', 'a', 'a', 'e', 'a', 't', 'p', 's', 't', 'n', 'i', 's', 's', 'o', 'i', 'h', 's', 's', 'e', 'f', 'n', 'i', 'p', 'p', 'n'], ['u', 's', 'u', 's', 'p', 't', 'e', 'i', 'n', 'i', 'u', 'e', 'e', 'n', 's', 'i', 'a', 'i', 's', 'h', 'p', 'e', 'q', 'i', 'q', 'c', 'c', 'c', 'a', 'i', 'e', 'i', 'c', 'i', 'i', 'a', 't', 'u', 'i', 'd', 'q', 'p']]


In [19]:
georgics_match_counts = [Counter(match) for match in georgics_matches]
georgics_match_counts = [dict(count.items()) for count in georgics_match_counts]
l = georgics_match_counts
print(georgics_match_counts)

[{'f': 1, 't': 4, 'a': 11, 'q': 3, 'n': 5, 'i': 5, 'p': 1, 's': 7, 'e': 8, 'u': 4, 'm': 1, 'h': 1}, {'p': 3, 'i': 5, 'n': 3, 's': 7, 'q': 2, 'a': 16, 'e': 6, 'h': 2, 'c': 3, 'f': 2, 'o': 1}, {'p': 5, 'c': 2, 'a': 8, 'i': 7, 't': 4, 's': 11, 'e': 4, 'h': 4, 'u': 1, 'n': 3, 'o': 1, 'f': 1}, {'u': 4, 's': 4, 'p': 3, 't': 2, 'e': 5, 'i': 10, 'n': 2, 'a': 3, 'h': 1, 'q': 3, 'c': 4, 'd': 1}]


In [20]:
alpha_keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u']

from collections import defaultdict
d = defaultdict(list)

for letter in alpha_keys:
    for item in l:
        if letter in item.keys():
            d[letter].append(item[letter])
        else:
            d[letter].append(0)

print(d)
            

defaultdict(<class 'list'>, {'a': [11, 16, 8, 3], 'b': [0, 0, 0, 0], 'c': [0, 3, 2, 4], 'd': [0, 0, 0, 1], 'e': [8, 6, 4, 5], 'f': [1, 2, 1, 0], 'g': [0, 0, 0, 0], 'h': [1, 2, 4, 1], 'i': [5, 5, 7, 10], 'l': [0, 0, 0, 0], 'm': [1, 0, 0, 0], 'n': [5, 3, 3, 2], 'o': [0, 1, 1, 0], 'p': [1, 3, 5, 3], 'q': [3, 2, 0, 3], 'r': [0, 0, 0, 0], 's': [7, 7, 11, 4], 't': [4, 0, 4, 2], 'u': [4, 0, 1, 4]})


In [21]:
tuples = list(zip(*[['X1X']*4,['1','2','3','4']]))
index = pd.MultiIndex.from_tuples(tuples, names=['Gaps', 'Book'])
X1X = pd.DataFrame.from_dict(d)
X1X.index = index
X1X



Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,u
Gaps,Book,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
X1X,1,11,0,0,0,8,1,0,1,5,0,1,5,0,1,3,0,7,4,4
X1X,2,16,0,3,0,6,2,0,2,5,0,0,3,1,3,2,0,7,0,0
X1X,3,8,0,2,0,4,1,0,4,7,0,0,3,1,5,0,0,11,4,1
X1X,4,3,0,4,1,5,0,0,1,10,0,0,2,0,3,3,0,4,2,4


In [22]:
df = df.append(X1X)

In [23]:
georgics_skipgrams = [find_skipgrams(initial,2,2) for initial in georgics_initials]

georgics_matches = []

for bigrams in georgics_skipgrams:
    x2x = []

    for bigram in bigrams:
        if get_x_match(bigram):
            x2x.append(bigram[0])
    
    georgics_matches.append(x2x)

#print(georgics_matches)

georgics_match_counts = [Counter(match) for match in georgics_matches]
georgics_match_counts = [dict(count.items()) for count in georgics_match_counts]
l = georgics_match_counts

alpha_keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u']

from collections import defaultdict
d = defaultdict(list)

for letter in alpha_keys:
    for item in l:
        if letter in item.keys():
            d[letter].append(item[letter])
        else:
            d[letter].append(0)

tuples = list(zip(*[['X2X']*4,['1','2','3','4']]))
index = pd.MultiIndex.from_tuples(tuples, names=['Gaps', 'Book'])
X2X = pd.DataFrame.from_dict(d)
X2X.index = index
X2X

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,u
Gaps,Book,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
X2X,1,9,0,2,0,2,0,0,0,5,0,0,5,0,1,1,0,5,2,3
X2X,2,6,0,4,0,3,5,0,6,7,0,1,3,0,6,0,0,4,0,0
X2X,3,8,0,6,0,2,0,0,0,5,0,0,5,0,3,0,0,4,1,1
X2X,4,7,0,1,1,8,3,0,0,9,0,0,0,0,3,2,0,9,3,0


In [24]:
df.append(X2X)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,u
Gaps,Book,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
XX,1,10,0,5,0,3,0,0,0,6,1,1,3,0,0,1,0,3,2,1
XX,2,11,0,0,0,2,0,0,2,4,0,0,8,0,3,2,0,3,0,1
XX,3,3,0,6,1,2,1,0,0,9,0,0,3,0,3,0,0,5,2,2
XX,4,7,0,3,1,7,1,0,0,7,0,3,0,3,3,3,0,6,5,2
X1X,1,11,0,0,0,8,1,0,1,5,0,1,5,0,1,3,0,7,4,4
X1X,2,16,0,3,0,6,2,0,2,5,0,0,3,1,3,2,0,7,0,0
X1X,3,8,0,2,0,4,1,0,4,7,0,0,3,1,5,0,0,11,4,1
X1X,4,3,0,4,1,5,0,0,1,10,0,0,2,0,3,3,0,4,2,4
X2X,1,9,0,2,0,2,0,0,0,5,0,0,5,0,1,1,0,5,2,3
X2X,2,6,0,4,0,3,5,0,6,7,0,1,3,0,6,0,0,4,0,0


In [43]:
from collections import defaultdict

df = pd.DataFrame()

for g in range(0,9):
    georgics_skipgrams = [find_skipgrams(initial,2,g) for initial in georgics_initials]
    georgics_matches = []

    for skipgrams in georgics_skipgrams:
        gap_counts = []

        for skipgram in skipgrams:
            if get_x_match(skipgram):
                gap_counts.append(skipgram[0])

        georgics_matches.append(gap_counts)

    georgics_match_counts = [Counter(match) for match in georgics_matches]
    georgics_match_counts = [dict(count.items()) for count in georgics_match_counts]
    l = georgics_match_counts


    alpha_keys = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u']


    d = defaultdict(list)

    for letter in alpha_keys:
        for item in l:
            if letter in item.keys():
                d[letter].append(item[letter])
            else:
                d[letter].append(0)
                    
    if g == 0:
        gap_name = "XX"
    else:
        gap_name = "X%dX" % g
        
    tuples = list(zip(*[[gap_name]*4,['1','2','3','4']]))
    
    index = pd.MultiIndex.from_tuples(tuples, names=['Gaps', 'Book'])
    df_sub = pd.DataFrame.from_dict(d)
    df_sub.index = index
    df = df.append(df_sub)


In [51]:
print('Table 13a (from Herdan 1962, p. 81)')
print('Gap frequencies in Vergil\'s Georgics\n')
pprint(df)

Table 13a (from Herdan 1962, p. 81)
Gap frequencies in Vergil's Georgics

            a  b  c  d  e  f  g  h   i  l  m  n  o  p  q  r   s  t  u
Gaps Book                                                            
XX   1     10  0  5  0  3  0  0  0   6  1  1  3  0  0  1  0   3  2  1
     2     11  0  0  0  2  0  0  2   4  0  0  8  0  3  2  0   3  0  1
     3      3  0  6  1  2  1  0  0   9  0  0  3  0  3  0  0   5  2  2
     4      7  0  3  1  7  1  0  0   7  0  3  0  3  3  3  0   6  5  2
X1X  1     11  0  0  0  8  1  0  1   5  0  1  5  0  1  3  0   7  4  4
     2     16  0  3  0  6  2  0  2   5  0  0  3  1  3  2  0   7  0  0
     3      8  0  2  0  4  1  0  4   7  0  0  3  1  5  0  0  11  4  1
     4      3  0  4  1  5  0  0  1  10  0  0  2  0  3  3  0   4  2  4
X2X  1      9  0  2  0  2  0  0  0   5  0  0  5  0  1  1  0   5  2  3
     2      6  0  4  0  3  5  0  6   7  0  1  3  0  6  0  0   4  0  0
     3      8  0  6  0  2  0  0  0   5  0  0  5  0  3  0  0   4  1  1
     4      7  0

## Table 14

In [59]:
georgics_initials = [get_initials(lines) for lines in georgics_lines]
georgics_rel_freqs = [dict(sorted(Counter(initials).items())) for initials in georgics_initials]
print(georgics_rel_freqs)

[{'a': 66, 'b': 2, 'c': 37, 'd': 21, 'e': 51, 'f': 21, 'g': 4, 'h': 18, 'i': 52, 'l': 11, 'm': 19, 'n': 38, 'o': 10, 'p': 26, 'q': 23, 'r': 5, 's': 48, 't': 31, 'u': 31}, {'a': 68, 'b': 5, 'c': 34, 'd': 16, 'e': 49, 'f': 27, 'g': 1, 'h': 30, 'i': 48, 'l': 7, 'm': 15, 'n': 52, 'o': 13, 'p': 38, 'q': 29, 'r': 10, 's': 52, 't': 27, 'u': 21}, {'a': 68, 'b': 6, 'c': 53, 'd': 21, 'e': 42, 'f': 16, 'g': 3, 'h': 22, 'i': 64, 'l': 14, 'm': 16, 'n': 42, 'o': 7, 'p': 52, 'q': 20, 'r': 6, 's': 58, 't': 33, 'u': 23}, {'a': 65, 'b': 1, 'c': 42, 'd': 22, 'e': 58, 'f': 20, 'g': 4, 'h': 24, 'i': 66, 'l': 6, 'm': 17, 'n': 28, 'o': 22, 'p': 40, 'q': 31, 'r': 6, 's': 47, 't': 35, 'u': 32}]


In [159]:
rel_freqs = pd.DataFrame.from_dict(georgics_rel_freqs).T
rel_freqs.columns = ['I','II','III','IV']
average_book_length = rel_freqs[['I','II','III','IV']].sum().mean()
total_book_length = rel_freqs[['I','II','III','IV']].sum(axis=1)
grand_total_book_length = total_book_length.sum()
rel_freqs['Av'] = rel_freqs.mean(axis=1)
rel_freqs['Prob'] = rel_freqs['Av'].apply(lambda x: round((x/average_book_length)*100, 2))
rel_freqs

Unnamed: 0,I,II,III,IV,Av,Prob
a,66,68,68,65,66.75,12.2
b,2,5,6,1,3.5,0.64
c,37,34,53,42,41.5,7.59
d,21,16,21,22,20.0,3.66
e,51,49,42,58,50.0,9.14
f,21,27,16,20,21.0,3.84
g,4,1,3,4,3.0,0.55
h,18,30,22,24,23.5,4.3
i,52,48,64,66,57.5,10.51
l,11,7,14,6,9.5,1.74


In [160]:
17.8 / 31

0.5741935483870968

In [161]:
11.1/19

0.5842105263157894

In [162]:
8.9/16

0.55625

In [163]:
6.7/12

0.5583333333333333

In [164]:
7.2/13

0.5538461538461539

In [165]:
13.8/14

0.9857142857142858

In [166]:
21.5/22

0.9772727272727273