In [1]:
from scipy import stats
import numpy as np
import pandas as pd

Test of `Scipy` Chi Square Implementation
===

In [2]:
# Quick test of scipy chi square implementation
e = np.array([1755, 952, 790, 178, 705])
o = np.array([490, 279, 250, 50, 193])
stats.chi2_contingency(np.stack([e, o]))

(2.5348285580374093,
 0.6384102476401078,
 4,
 array([[1742.83941865,  955.65047855,  807.37327189,  177.00106345,
          697.13576746],
        [ 502.16058135,  275.34952145,  232.62672811,   50.99893655,
          200.86423254]]))

Totals of 5 Rhythm Categories for Cicero's Speeches
===

In [3]:
cicero_speeches = pd.read_csv('../data/cicero_speeches_pca.csv')
cicero_speeches = cicero_speeches.drop(columns=['Unnamed: 0'])
cicero_speeches_total = pd.DataFrame()
cicero_speeches_total['cretic-trochee'] = cicero_speeches[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
cicero_speeches_total['double-molossus-cretic'] = cicero_speeches[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
cicero_speeches_total['double-trochee'] = cicero_speeches[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
cicero_speeches_total['hypodochmiac'] = cicero_speeches[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
cicero_speeches_total['other'] = cicero_speeches[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
cicero_speeches_total['title'] = cicero_speeches['title'].values

In [4]:
cicero_speeches_total.head()

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
0,215,146,236,45,102,de domo sua
1,136,78,116,13,59,de haruspicum responso
2,35,33,44,11,19,de lege agraria 1
3,175,117,137,39,96,de lege agraria 2
4,21,24,17,5,16,de lege agraria 3


Method for Generating Chi Square Results
===

In [5]:
def chi_square_corpus(df):
    titles = df['title'].values
    chis = []
    ps = []
    for t in titles:
        speech = df.loc[df['title'] == t]
        speech = speech.drop(columns=['title'])
        rest_of_speeches = df.loc[df['title'] != t]
        rest_of_speeches = rest_of_speeches.drop(columns=['title'])
        rest_of_speeches = rest_of_speeches.sum(axis=0)
        rest_of_speeches = pd.DataFrame(data=rest_of_speeches).T
        test_df = speech.append(rest_of_speeches)
        chi, p, dof, table = stats.chi2_contingency(test_df)
        chis.append(chi)
        ps.append(p)
    new_df = pd.DataFrame()
    new_df['title'] = titles
    new_df['chi'] = chis
    new_df['p'] = [round(p, 5) for p in ps]
    return new_df

Chi Square Table of 5 Rhythm Categories for Cicero's Speeches
===

In [6]:
cicero_totaled_chi_df = chi_square_corpus(cicero_speeches_total)

In [7]:
cicero_totaled_chi_df

Unnamed: 0,title,chi,p
0,de domo sua,23.330701,0.00011
1,de haruspicum responso,14.462578,0.00596
2,de lege agraria 1,5.25535,0.26209
3,de lege agraria 2,8.543234,0.07359
4,de lege agraria 3,2.355887,0.67061
5,de provinciis consularibus,19.136572,0.00074
6,in caecilium,4.302021,0.36667
7,in catilinam 1,12.122659,0.01646
8,in catilinam 2,10.68804,0.0303
9,in catilinam 3,11.604888,0.02054


Chi Square of Cicero Speeches for all Categories
===

In [8]:
cicero_speeches.head()

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae,author
0,de domo sua,137,16,51,11,61,35,11,4,1,...,42,2,1,52,13,14,5,1,17,cicero
1,de haruspicum responso,82,8,37,9,26,26,4,2,1,...,12,0,1,41,11,2,2,1,2,cicero
2,de lege agraria 1,23,2,6,4,12,13,2,0,0,...,10,1,0,13,0,1,0,0,5,cicero
3,de lege agraria 2,131,8,26,10,35,38,4,3,2,...,38,1,0,49,15,16,5,0,11,cicero
4,de lege agraria 3,13,2,3,3,11,9,1,0,0,...,4,1,0,9,1,1,0,0,5,cicero


In [9]:
chi_square_corpus(cicero_speeches.drop(columns=['author']))

Unnamed: 0,title,chi,p
0,de domo sua,40.957185,0.0232
1,de haruspicum responso,42.039523,0.0178
2,de lege agraria 1,27.379938,0.33718
3,de lege agraria 2,30.631379,0.20152
4,de lege agraria 3,28.830866,0.27105
5,de provinciis consularibus,48.669721,0.0031
6,in caecilium,18.159192,0.83556
7,in catilinam 1,41.21325,0.0218
8,in catilinam 2,30.848473,0.19409
9,in catilinam 3,27.586805,0.32722


Rhythmic/Unrhythmic Table of Cicero's Speeches
===

In [10]:
rhythmic_unrhythmic_cicero_speeches_df = pd.DataFrame()
rhythmic_unrhythmic_cicero_speeches_df['rhythmic'] = cicero_speeches_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_cicero_speeches_df['unrhythmic'] = cicero_speeches_total['other'].values
rhythmic_unrhythmic_cicero_speeches_df['title'] = cicero_speeches_total['title'].values
rhythmic_unrhythmic_cicero_speeches_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
0,642,102,de domo sua
1,343,59,de haruspicum responso
2,123,19,de lege agraria 1
3,468,96,de lege agraria 2
4,67,16,de lege agraria 3


In [11]:
rhythmic_unrhythmic_cicero_speeches_chi = chi_square_corpus(rhythmic_unrhythmic_cicero_speeches_df)
rhythmic_unrhythmic_cicero_speeches_chi

Unnamed: 0,title,chi,p
0,de domo sua,6.916403,0.00854
1,de haruspicum responso,1.886225,0.16963
2,de lege agraria 1,1.320256,0.25055
3,de lege agraria 2,0.028352,0.86628
4,de lege agraria 3,0.097904,0.75436
5,de provinciis consularibus,7.665223,0.00563
6,in caecilium,1.278808,0.25812
7,in catilinam 1,1.122944,0.28929
8,in catilinam 2,4.526056,0.03338
9,in catilinam 3,1.137329,0.28622


Total of 5 Rhythm Categories of Tacitus
===

In [12]:
tacitus = pd.read_csv('../data/tacitus_pca.csv')
tacitus = tacitus.drop(columns=['Unnamed: 0'])
tacitus_total = pd.DataFrame()
tacitus_total['cretic-trochee'] = tacitus[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
tacitus_total['double-molossus-cretic'] = tacitus[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
tacitus_total['double-trochee'] = tacitus[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
tacitus_total['hypodochmiac'] = tacitus[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
tacitus_total['other'] = tacitus[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
tacitus_total['title'] = tacitus['title'].values

In [13]:
tacitus_total.head()

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
0,69.0,74.0,83.0,33.0,175.0,agricola
1,106.0,141.0,131.0,44.0,332.0,annales book 1
2,49.0,54.0,58.0,27.0,123.0,annales book 11
3,90.0,69.0,79.0,32.0,199.0,annales book 12
4,65.0,70.0,82.0,28.0,194.0,annales book 13


Chi Square Table of 5 Rhythm Categories for Tacitus
===

In [14]:
tacitus_total_chi = chi_square_corpus(tacitus_total)
tacitus_total_chi

Unnamed: 0,title,chi,p
0,agricola,3.448332,0.48578
1,annales book 1,5.931865,0.20429
2,annales book 11,5.006846,0.2866
3,annales book 12,3.609361,0.46145
4,annales book 13,2.479791,0.64826
5,annales book 14,5.275282,0.2602
6,annales book 15,0.593708,0.96376
7,annales book 16,2.822316,0.58799
8,annales book 2,21.870342,0.00021
9,annales book 3,3.732413,0.44343


Chi Square of Tacitus of all Categories
===

In [15]:
tacitus.head()

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae,author
0,agricola,45.0,2.0,9.0,13.0,24.0,15.0,2.0,1.0,2.0,...,29.0,3.0,1.0,95.0,20.0,37.0,17.0,2.0,4.0,tacitus
1,annales book 1,66.0,10.0,8.0,22.0,32.0,34.0,5.0,9.0,4.0,...,35.0,2.0,7.0,184.0,48.0,68.0,25.0,0.0,7.0,tacitus
2,annales book 11,25.0,5.0,6.0,13.0,18.0,8.0,1.0,2.0,2.0,...,20.0,1.0,6.0,74.0,18.0,23.0,7.0,0.0,1.0,tacitus
3,annales book 12,45.0,8.0,16.0,21.0,19.0,15.0,4.0,3.0,3.0,...,27.0,2.0,3.0,111.0,25.0,42.0,14.0,1.0,6.0,tacitus
4,annales book 13,43.0,11.0,6.0,5.0,16.0,11.0,3.0,2.0,6.0,...,21.0,1.0,6.0,115.0,29.0,35.0,11.0,2.0,2.0,tacitus


In [16]:
chi_square_corpus(tacitus.drop(columns=['author']))

Unnamed: 0,title,chi,p
0,agricola,21.384856,0.67097
1,annales book 1,27.335823,0.33932
2,annales book 11,20.865542,0.70008
3,annales book 12,24.965916,0.46429
4,annales book 13,26.085941,0.40302
5,annales book 14,19.856461,0.75431
6,annales book 15,16.903612,0.88526
7,annales book 16,34.536254,0.09693
8,annales book 2,39.904518,0.02983
9,annales book 3,36.426768,0.06537


Rhythmic/Unrhythmic Table of Tacitus
===

In [17]:
rhythmic_unrhythmic_tacitus_df = pd.DataFrame()
rhythmic_unrhythmic_tacitus_df['rhythmic'] = tacitus_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_tacitus_df['unrhythmic'] = tacitus_total['other'].values
rhythmic_unrhythmic_tacitus_df['title'] = tacitus_total['title'].values
rhythmic_unrhythmic_tacitus_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
0,259.0,175.0,agricola
1,422.0,332.0,annales book 1
2,188.0,123.0,annales book 11
3,270.0,199.0,annales book 12
4,245.0,194.0,annales book 13


In [18]:
rhythmic_unrhythmic_tacitus_chi = chi_square_corpus(rhythmic_unrhythmic_tacitus_df)
rhythmic_unrhythmic_tacitus_chi

Unnamed: 0,title,chi,p
0,agricola,0.753002,0.38553
1,annales book 1,0.758375,0.38384
2,annales book 11,0.987066,0.32046
3,annales book 12,0.001383,0.97034
4,annales book 13,0.493704,0.48228
5,annales book 14,0.153639,0.69508
6,annales book 15,0.001404,0.97011
7,annales book 16,1.217397,0.26987
8,annales book 2,8.616343,0.00333
9,annales book 3,0.266083,0.60597


Total of 5 Rhythm Categories of Pliny
===

In [19]:
pliny = pd.read_csv('../data/pliny_pca.csv')
pliny = pliny.loc[pliny['title'] != 'versus']
pliny = pliny.drop(columns=['Unnamed: 0'])
pliny_total = pd.DataFrame()
pliny_total['cretic-trochee'] = pliny[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
pliny_total['double-molossus-cretic'] = pliny[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
pliny_total['double-trochee'] = pliny[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
pliny_total['hypodochmiac'] = pliny[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
pliny_total['other'] = pliny[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
pliny_total['title'] = pliny['title'].values

In [20]:
pliny_total.head()

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
0,39.0,30.0,17.0,15.0,50.0,epistulae 10 trajan
1,183.0,86.0,64.0,21.0,60.0,epistulae 8
2,200.0,98.0,81.0,20.0,97.0,epistulae 9
3,490.0,279.0,250.0,50.0,193.0,panegyricus
5,193.0,114.0,78.0,21.0,74.0,epistulae 4


In [21]:
chi_square_corpus(pliny_total)

Unnamed: 0,title,chi,p
0,epistulae 10 trajan,48.329484,0.0
1,epistulae 8,6.768071,0.14866
2,epistulae 9,5.223296,0.26514
3,panegyricus,4.041099,0.40047
4,epistulae 4,3.266988,0.51418
5,epistulae 5,7.08281,0.13158
6,epistulae 7,4.606982,0.33005
7,epistulae 10 pliny,15.531704,0.00372
8,epistulae 6,0.402412,0.98228
9,epistulae 2,1.576676,0.81298


Chi of Rhythms of All Categories Pliny
===

In [22]:
pliny.head()

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae,author
0,epistulae 10 trajan,22.0,6.0,4.0,7.0,10.0,7.0,0.0,2.0,1.0,...,15.0,0.0,0.0,26.0,6.0,14.0,3.0,0.0,1.0,pliny
1,epistulae 8,114.0,11.0,19.0,39.0,46.0,9.0,6.0,8.0,0.0,...,20.0,1.0,0.0,21.0,13.0,11.0,9.0,2.0,4.0,pliny
2,epistulae 9,117.0,18.0,19.0,46.0,53.0,12.0,18.0,2.0,2.0,...,16.0,4.0,0.0,32.0,15.0,16.0,7.0,14.0,13.0,pliny
3,panegyricus,298.0,30.0,60.0,102.0,162.0,29.0,22.0,11.0,8.0,...,45.0,2.0,3.0,101.0,21.0,28.0,24.0,1.0,18.0,pliny
5,epistulae 4,113.0,13.0,21.0,46.0,75.0,12.0,9.0,2.0,1.0,...,20.0,0.0,1.0,30.0,8.0,14.0,8.0,3.0,11.0,pliny


In [23]:
chi_square_corpus(pliny.drop(columns=['author']))

Unnamed: 0,title,chi,p
0,epistulae 10 trajan,107.208419,0.0
1,epistulae 8,32.49593,0.14416
2,epistulae 9,87.359426,0.0
3,panegyricus,29.681086,0.23645
4,epistulae 4,17.630841,0.85765
5,epistulae 5,24.266104,0.50404
6,epistulae 7,20.947681,0.69552
7,epistulae 10 pliny,31.676379,0.16759
8,epistulae 6,23.587813,0.54328
9,epistulae 2,20.952075,0.69528


Rhythmic/Unrhythmic Pliny
===

In [24]:
rhythmic_unrhythmic_pliny_df = pd.DataFrame()
rhythmic_unrhythmic_pliny_df['rhythmic'] = pliny_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_pliny_df['unrhythmic'] = pliny_total['other'].values
rhythmic_unrhythmic_pliny_df['title'] = pliny_total['title'].values
rhythmic_unrhythmic_pliny_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
0,101.0,50.0,epistulae 10 trajan
1,354.0,60.0,epistulae 8
2,399.0,97.0,epistulae 9
3,1069.0,193.0,panegyricus
5,406.0,74.0,epistulae 4


In [25]:
chi_square_corpus(rhythmic_unrhythmic_pliny_df)

Unnamed: 0,title,chi,p
0,epistulae 10 trajan,29.45176,0.0
1,epistulae 8,1.221749,0.26902
2,epistulae 9,3.261127,0.07094
3,panegyricus,1.739514,0.1872
4,epistulae 4,0.410015,0.52196
5,epistulae 5,1.974811,0.15994
6,epistulae 7,0.007131,0.9327
7,epistulae 10 pliny,2.367095,0.12392
8,epistulae 6,0.011651,0.91405
9,epistulae 2,0.59006,0.4424


Epistulae Books 1-9 Tests
===

In [26]:
rows_to_exclude = ['epistulae 10 trajan', 'panegyricus', 'epistulae 10 pliny']
pliny_total_epist = pliny_total.loc[~pliny_total['title'].isin(rows_to_exclude)]
pliny_total_epist

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
1,183.0,86.0,64.0,21.0,60.0,epistulae 8
2,200.0,98.0,81.0,20.0,97.0,epistulae 9
5,193.0,114.0,78.0,21.0,74.0,epistulae 4
6,184.0,120.0,79.0,18.0,66.0,epistulae 5
7,207.0,104.0,80.0,15.0,82.0,epistulae 7
9,215.0,123.0,101.0,26.0,94.0,epistulae 6
10,192.0,100.0,96.0,19.0,73.0,epistulae 2
11,196.0,92.0,119.0,16.0,82.0,epistulae 3
12,185.0,115.0,92.0,22.0,77.0,epistulae 1


In [27]:
chi_square_corpus(pliny_total_epist)

Unnamed: 0,title,chi,p
0,epistulae 8,5.703863,0.22238
1,epistulae 9,6.08032,0.19323
2,epistulae 4,2.246296,0.69056
3,epistulae 5,5.502795,0.23948
4,epistulae 7,3.119654,0.53801
5,epistulae 6,1.184584,0.88063
6,epistulae 2,1.633216,0.80281
7,epistulae 3,14.278306,0.00646
8,epistulae 1,1.957737,0.74353


In [29]:
rhythmic_unrhythmic_pliny_epist_df = rhythmic_unrhythmic_pliny_df[~rhythmic_unrhythmic_pliny_df['title'].isin(rows_to_exclude)]
rhythmic_unrhythmic_pliny_epist_df

Unnamed: 0,rhythmic,unrhythmic,title
1,354.0,60.0,epistulae 8
2,399.0,97.0,epistulae 9
5,406.0,74.0,epistulae 4
6,401.0,66.0,epistulae 5
7,406.0,82.0,epistulae 7
9,465.0,94.0,epistulae 6
10,407.0,73.0,epistulae 2
11,423.0,82.0,epistulae 3
12,414.0,77.0,epistulae 1


In [30]:
chi_square_corpus(rhythmic_unrhythmic_pliny_epist_df)

Unnamed: 0,title,chi,p
0,epistulae 8,0.743932,0.3884
1,epistulae 9,4.675128,0.0306
2,epistulae 4,0.132,0.71637
3,epistulae 5,1.333422,0.2482
4,epistulae 7,0.14881,0.69968
5,epistulae 6,0.188561,0.66412
6,epistulae 2,0.244968,0.62064
7,epistulae 3,0.000771,0.97784
8,epistulae 1,0.039802,0.84187


Varro Tests
===

In [39]:
varro = pd.read_csv('../data/varro_df.csv')
varro = varro.drop(columns=['total_clausulae', 'total_excluded', 'abbrev_excluded', 'bracket_excluded', 'short_excluded', 'total_artistic', 'percent_clausulae'])
rows_to_include = ['de lingua latina book 5' , 'de lingua latina book 6', 'de lingua latina book 7', 'de lingua latina book 8', 'de lingua latina book 9', 'res rustica book 1', 'res rustica book 2', 'res rustica book 3']
varro = varro.loc[varro['title'].isin(rows_to_include)]
varro_total = pd.DataFrame()
varro_total['cretic-trochee'] = varro[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
varro_total['double-molossus-cretic'] = varro[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
varro_total['double-trochee'] = varro[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
varro_total['hypodochmiac'] = varro[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
varro_total['other'] = varro[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
varro_total['title'] = varro['title'].values

In [40]:
varro_total

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
7,161.0,98.0,179.0,49.0,461.0,de lingua latina book 5
8,72.0,52.0,86.0,24.0,246.0,de lingua latina book 6
9,72.0,85.0,80.0,28.0,284.0,de lingua latina book 7
10,38.0,33.0,52.0,24.0,120.0,de lingua latina book 8
11,119.0,72.0,103.0,50.0,298.0,de lingua latina book 9
21,147.0,60.0,177.0,31.0,393.0,res rustica book 1
22,107.0,68.0,135.0,22.0,340.0,res rustica book 2
23,110.0,35.0,108.0,17.0,278.0,res rustica book 3


In [41]:
chi_square_corpus(varro_total)

Unnamed: 0,title,chi,p
0,de lingua latina book 5,0.219121,0.99442
1,de lingua latina book 6,1.839035,0.76533
2,de lingua latina book 7,28.114991,1e-05
3,de lingua latina book 8,12.580828,0.01352
4,de lingua latina book 9,17.663949,0.00144
5,res rustica book 1,16.432805,0.00249
6,res rustica book 2,6.014941,0.19804
7,res rustica book 3,17.82126,0.00134


In [42]:
rhythmic_unrhythmic_varro_df = pd.DataFrame()
rhythmic_unrhythmic_varro_df['rhythmic'] = varro_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_varro_df['unrhythmic'] = varro_total['other'].values
rhythmic_unrhythmic_varro_df['title'] = varro_total['title'].values
rhythmic_unrhythmic_varro_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
7,487.0,461.0,de lingua latina book 5
8,234.0,246.0,de lingua latina book 6
9,265.0,284.0,de lingua latina book 7
10,147.0,120.0,de lingua latina book 8
11,344.0,298.0,de lingua latina book 9


In [43]:
chi_square_corpus(rhythmic_unrhythmic_varro_df)

Unnamed: 0,title,chi,p
0,de lingua latina book 5,0.150346,0.69821
1,de lingua latina book 6,0.767344,0.38104
2,de lingua latina book 7,1.415178,0.2342
3,de lingua latina book 8,1.913706,0.16655
4,de lingua latina book 9,2.237209,0.13472
5,res rustica book 1,0.115572,0.73389
6,res rustica book 2,0.50534,0.47716
7,res rustica book 3,0.477906,0.48937


Caesar Tests
===

In [50]:
caesar = pd.read_csv('../data/caesar_df.csv')
caesar = caesar.drop(columns=['total_clausulae', 'total_excluded', 'abbrev_excluded', 'bracket_excluded', 'short_excluded', 'total_artistic', 'percent_clausulae'])
rows_to_exclude = ['anticato' , 'bellum civile', 'carmina', 'de analogia', 'epistulae ad ciceronem', 'epistulae ad familiares', 'orationes', 'de bello gallico', 'de bello gallico book 8']
caesar = caesar.loc[~caesar['title'].isin(rows_to_exclude)]
caesar_total = pd.DataFrame()
caesar_total['cretic-trochee'] = caesar[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
caesar_total['double-molossus-cretic'] = caesar[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
caesar_total['double-trochee'] = caesar[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
caesar_total['hypodochmiac'] = caesar[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
caesar_total['other'] = caesar[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
caesar_total['title'] = caesar['title'].values

In [51]:
caesar

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,double trochee 1 res (-uuux),hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae
2,bellum civile book 1,106.0,15.0,13.0,19.0,36.0,31.0,11.0,1.0,6.0,...,4.0,30.0,3.0,3.0,127.0,22.0,50.0,22.0,1.0,4.0
3,bellum civile book 2,58.0,11.0,11.0,5.0,16.0,23.0,3.0,1.0,5.0,...,2.0,12.0,2.0,3.0,73.0,20.0,29.0,4.0,3.0,4.0
4,bellum civile book 3,89.0,22.0,21.0,14.0,29.0,39.0,9.0,2.0,4.0,...,12.0,29.0,2.0,3.0,162.0,44.0,40.0,20.0,5.0,7.0
8,de bello gallico book 1,52.0,6.0,14.0,8.0,9.0,18.0,1.0,1.0,3.0,...,8.0,8.0,0.0,5.0,110.0,28.0,30.0,16.0,2.0,2.0
9,de bello gallico book 2,27.0,3.0,6.0,3.0,7.0,11.0,0.0,0.0,2.0,...,4.0,9.0,0.0,2.0,57.0,7.0,8.0,4.0,1.0,0.0
10,de bello gallico book 3,16.0,1.0,7.0,1.0,5.0,3.0,1.0,1.0,1.0,...,0.0,7.0,0.0,1.0,45.0,7.0,5.0,3.0,0.0,1.0
11,de bello gallico book 4,32.0,2.0,6.0,5.0,6.0,12.0,1.0,0.0,1.0,...,4.0,7.0,0.0,0.0,53.0,10.0,12.0,11.0,1.0,3.0
12,de bello gallico book 5,70.0,10.0,7.0,17.0,20.0,23.0,3.0,0.0,0.0,...,9.0,22.0,2.0,2.0,91.0,10.0,29.0,5.0,3.0,3.0
13,de bello gallico book 6,54.0,5.0,8.0,9.0,18.0,18.0,2.0,1.0,4.0,...,5.0,9.0,2.0,1.0,54.0,8.0,21.0,6.0,2.0,2.0
14,de bello gallico book 7,79.0,12.0,18.0,20.0,24.0,43.0,3.0,1.0,2.0,...,6.0,19.0,1.0,1.0,155.0,26.0,47.0,22.0,6.0,9.0


In [52]:
chi_square_corpus(caesar_total)

Unnamed: 0,title,chi,p
0,bellum civile book 1,6.9217,0.14008
1,bellum civile book 2,4.123356,0.38957
2,bellum civile book 3,1.172055,0.88268
3,de bello gallico book 1,26.466583,3e-05
4,de bello gallico book 2,3.299387,0.50903
5,de bello gallico book 3,8.571934,0.07274
6,de bello gallico book 4,3.1432,0.53415
7,de bello gallico book 5,9.209554,0.05607
8,de bello gallico book 6,12.784196,0.01238
9,de bello gallico book 7,5.930376,0.20441


In [53]:
rhythmic_unrhythmic_caesar_df = pd.DataFrame()
rhythmic_unrhythmic_caesar_df['rhythmic'] = caesar_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_caesar_df['unrhythmic'] = caesar_total['other'].values
rhythmic_unrhythmic_caesar_df['title'] = caesar_total['title'].values
rhythmic_unrhythmic_caesar_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
2,461.0,226.0,bellum civile book 1
3,241.0,133.0,bellum civile book 2
4,463.0,278.0,bellum civile book 3
8,272.0,188.0,de bello gallico book 1
9,114.0,77.0,de bello gallico book 2


In [54]:
chi_square_corpus(rhythmic_unrhythmic_caesar_df)

Unnamed: 0,title,chi,p
0,bellum civile book 1,3.995945,0.04561
1,bellum civile book 2,0.071317,0.78943
2,bellum civile book 3,0.486002,0.48572
3,de bello gallico book 1,4.376196,0.03644
4,de bello gallico book 2,1.197931,0.27374
5,de bello gallico book 3,0.50789,0.47605
6,de bello gallico book 4,2.217653,0.13644
7,de bello gallico book 5,2.899216,0.08862
8,de bello gallico book 6,8.461044,0.00363
9,de bello gallico book 7,1.586164,0.20788
