In [12]:
from scipy import stats
import numpy as np
import pandas as pd

Test of `Scipy` Chi Square Implementation
===

In [13]:
# Quick test of scipy chi square implementation
e = np.array([1755, 952, 790, 178, 705])
o = np.array([490, 279, 250, 50, 193])
stats.chi2_contingency(np.stack([e, o]))

(2.5348285580374093,
 0.6384102476401078,
 4,
 array([[1742.83941865,  955.65047855,  807.37327189,  177.00106345,
          697.13576746],
        [ 502.16058135,  275.34952145,  232.62672811,   50.99893655,
          200.86423254]]))

Totals of 5 Rhythm Categories for Cicero's Speeches
===

In [19]:
cicero_speeches = pd.read_csv('../data/cicero_speeches_pca.csv')
cicero_speeches = cicero_speeches.drop(columns=['Unnamed: 0'])
cicero_speeches_total = pd.DataFrame()
cicero_speeches_total['cretic-trochee'] = cicero_speeches[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
cicero_speeches_total['double-molossus-cretic'] = cicero_speeches[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
cicero_speeches_total['double-trochee'] = cicero_speeches[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
cicero_speeches_total['hypodochmiac'] = cicero_speeches[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
cicero_speeches_total['other'] = cicero_speeches[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
cicero_speeches_total['title'] = cicero_speeches['title'].values

In [20]:
cicero_speeches_total.head()

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
0,215,146,236,45,102,de domo sua
1,136,78,116,13,59,de haruspicum responso
2,35,33,44,11,19,de lege agraria 1
3,175,117,137,39,96,de lege agraria 2
4,21,24,17,5,16,de lege agraria 3


Method for Generating Chi Square Results
===

In [21]:
def chi_square_corpus(df):
    titles = df['title'].values
    chis = []
    ps = []
    for t in titles:
        speech = df.loc[df['title'] == t]
        speech = speech.drop(columns=['title'])
        rest_of_speeches = df.loc[df['title'] != t]
        rest_of_speeches = rest_of_speeches.drop(columns=['title'])
        rest_of_speeches = rest_of_speeches.sum(axis=0)
        rest_of_speeches = pd.DataFrame(data=rest_of_speeches).T
        test_df = speech.append(rest_of_speeches)
        chi, p, dof, table = stats.chi2_contingency(test_df)
        chis.append(chi)
        ps.append(p)
    new_df = pd.DataFrame()
    new_df['title'] = titles
    new_df['chi'] = chis
    new_df['p'] = [round(p, 5) for p in ps]
    return new_df

Chi Square Table of 5 Rhythm Categories for Cicero's Speeches
===

In [22]:
cicero_totaled_chi_df = chi_square_corpus(cicero_speeches_total)

In [23]:
cicero_totaled_chi_df

Unnamed: 0,title,chi,p
0,de domo sua,23.330701,0.00011
1,de haruspicum responso,14.462578,0.00596
2,de lege agraria 1,5.25535,0.26209
3,de lege agraria 2,8.543234,0.07359
4,de lege agraria 3,2.355887,0.67061
5,de provinciis consularibus,19.136572,0.00074
6,in caecilium,4.302021,0.36667
7,in catilinam 1,12.122659,0.01646
8,in catilinam 2,10.68804,0.0303
9,in catilinam 3,11.604888,0.02054


Chi Square of Cicero Speeches for all Categories
===

In [35]:
cicero_speeches.head()

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae,author
0,de domo sua,137,16,51,11,61,35,11,4,1,...,42,2,1,52,13,14,5,1,17,cicero
1,de haruspicum responso,82,8,37,9,26,26,4,2,1,...,12,0,1,41,11,2,2,1,2,cicero
2,de lege agraria 1,23,2,6,4,12,13,2,0,0,...,10,1,0,13,0,1,0,0,5,cicero
3,de lege agraria 2,131,8,26,10,35,38,4,3,2,...,38,1,0,49,15,16,5,0,11,cicero
4,de lege agraria 3,13,2,3,3,11,9,1,0,0,...,4,1,0,9,1,1,0,0,5,cicero


In [37]:
chi_square_corpus(cicero_speeches.drop(columns=['author']))

Unnamed: 0,title,chi,p
0,de domo sua,40.957185,0.0232
1,de haruspicum responso,42.039523,0.0178
2,de lege agraria 1,27.379938,0.33718
3,de lege agraria 2,30.631379,0.20152
4,de lege agraria 3,28.830866,0.27105
5,de provinciis consularibus,48.669721,0.0031
6,in caecilium,18.159192,0.83556
7,in catilinam 1,41.21325,0.0218
8,in catilinam 2,30.848473,0.19409
9,in catilinam 3,27.586805,0.32722


Rhythmic/Unrhythmic Table of Cicero's Speeches
===

In [25]:
rhythmic_unrhythmic_cicero_speeches_df = pd.DataFrame()
rhythmic_unrhythmic_cicero_speeches_df['rhythmic'] = cicero_speeches_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_cicero_speeches_df['unrhythmic'] = cicero_speeches_total['other'].values
rhythmic_unrhythmic_cicero_speeches_df['title'] = cicero_speeches_total['title'].values
rhythmic_unrhythmic_cicero_speeches_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
0,642,102,de domo sua
1,343,59,de haruspicum responso
2,123,19,de lege agraria 1
3,468,96,de lege agraria 2
4,67,16,de lege agraria 3


In [26]:
rhythmic_unrhythmic_cicero_speeches_chi = chi_square_corpus(rhythmic_unrhythmic_cicero_speeches_df)
rhythmic_unrhythmic_cicero_speeches_chi

Unnamed: 0,title,chi,p
0,de domo sua,6.916403,0.00854
1,de haruspicum responso,1.886225,0.16963
2,de lege agraria 1,1.320256,0.25055
3,de lege agraria 2,0.028352,0.86628
4,de lege agraria 3,0.097904,0.75436
5,de provinciis consularibus,7.665223,0.00563
6,in caecilium,1.278808,0.25812
7,in catilinam 1,1.122944,0.28929
8,in catilinam 2,4.526056,0.03338
9,in catilinam 3,1.137329,0.28622


Total of 5 Rhythm Categories of Tacitus
===

In [29]:
tacitus = pd.read_csv('../data/tacitus_pca.csv')
tacitus = tacitus.drop(columns=['Unnamed: 0'])
tacitus_total = pd.DataFrame()
tacitus_total['cretic-trochee'] = tacitus[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
tacitus_total['double-molossus-cretic'] = tacitus[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
tacitus_total['double-trochee'] = tacitus[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
tacitus_total['hypodochmiac'] = tacitus[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
tacitus_total['other'] = tacitus[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
tacitus_total['title'] = tacitus['title'].values

In [30]:
tacitus_total.head()

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
0,69.0,74.0,83.0,33.0,175.0,agricola
1,106.0,141.0,131.0,44.0,332.0,annales book 1
2,49.0,54.0,58.0,27.0,123.0,annales book 11
3,90.0,69.0,79.0,32.0,199.0,annales book 12
4,65.0,70.0,82.0,28.0,194.0,annales book 13


Chi Square Table of 5 Rhythm Categories for Tacitus
===

In [31]:
tacitus_total_chi = chi_square_corpus(tacitus_total)
tacitus_total_chi

Unnamed: 0,title,chi,p
0,agricola,3.448332,0.48578
1,annales book 1,5.931865,0.20429
2,annales book 11,5.006846,0.2866
3,annales book 12,3.609361,0.46145
4,annales book 13,2.479791,0.64826
5,annales book 14,5.275282,0.2602
6,annales book 15,0.593708,0.96376
7,annales book 16,2.822316,0.58799
8,annales book 2,21.870342,0.00021
9,annales book 3,3.732413,0.44343


Chi Square of Tacitus of all Categories
===

In [38]:
tacitus.head()

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae,author
0,agricola,45.0,2.0,9.0,13.0,24.0,15.0,2.0,1.0,2.0,...,29.0,3.0,1.0,95.0,20.0,37.0,17.0,2.0,4.0,tacitus
1,annales book 1,66.0,10.0,8.0,22.0,32.0,34.0,5.0,9.0,4.0,...,35.0,2.0,7.0,184.0,48.0,68.0,25.0,0.0,7.0,tacitus
2,annales book 11,25.0,5.0,6.0,13.0,18.0,8.0,1.0,2.0,2.0,...,20.0,1.0,6.0,74.0,18.0,23.0,7.0,0.0,1.0,tacitus
3,annales book 12,45.0,8.0,16.0,21.0,19.0,15.0,4.0,3.0,3.0,...,27.0,2.0,3.0,111.0,25.0,42.0,14.0,1.0,6.0,tacitus
4,annales book 13,43.0,11.0,6.0,5.0,16.0,11.0,3.0,2.0,6.0,...,21.0,1.0,6.0,115.0,29.0,35.0,11.0,2.0,2.0,tacitus


In [40]:
chi_square_corpus(tacitus.drop(columns=['author']))

Unnamed: 0,title,chi,p
0,agricola,21.384856,0.67097
1,annales book 1,27.335823,0.33932
2,annales book 11,20.865542,0.70008
3,annales book 12,24.965916,0.46429
4,annales book 13,26.085941,0.40302
5,annales book 14,19.856461,0.75431
6,annales book 15,16.903612,0.88526
7,annales book 16,34.536254,0.09693
8,annales book 2,39.904518,0.02983
9,annales book 3,36.426768,0.06537


Rhythmic/Unrhythmic Table of Tacitus
===

In [33]:
rhythmic_unrhythmic_tacitus_df = pd.DataFrame()
rhythmic_unrhythmic_tacitus_df['rhythmic'] = tacitus_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_tacitus_df['unrhythmic'] = tacitus_total['other'].values
rhythmic_unrhythmic_tacitus_df['title'] = tacitus_total['title'].values
rhythmic_unrhythmic_tacitus_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
0,259.0,175.0,agricola
1,422.0,332.0,annales book 1
2,188.0,123.0,annales book 11
3,270.0,199.0,annales book 12
4,245.0,194.0,annales book 13


In [34]:
rhythmic_unrhythmic_tacitus_chi = chi_square_corpus(rhythmic_unrhythmic_tacitus_df)
rhythmic_unrhythmic_tacitus_chi

Unnamed: 0,title,chi,p
0,agricola,0.753002,0.38553
1,annales book 1,0.758375,0.38384
2,annales book 11,0.987066,0.32046
3,annales book 12,0.001383,0.97034
4,annales book 13,0.493704,0.48228
5,annales book 14,0.153639,0.69508
6,annales book 15,0.001404,0.97011
7,annales book 16,1.217397,0.26987
8,annales book 2,8.616343,0.00333
9,annales book 3,0.266083,0.60597


Total of 5 Rhythm Categories of Pliny
===

In [42]:
pliny = pd.read_csv('../data/pliny_pca.csv')
pliny = pliny.drop(columns=['Unnamed: 0'])
pliny_total = pd.DataFrame()
pliny_total['cretic-trochee'] = pliny[['cretic-trochee (-u--x)', 'cretic-trochee 1 res (uuu--x)',
       'cretic-trochee 1 res (-uuu-x)', 'cretic-trochee 1 res (-u-uux)']].sum(axis=1)
pliny_total['double-molossus-cretic'] = pliny[['double/molossus-cretic pure double-cretic (-u--ux)',
       'double/molossus-cretic pure molossus-cretic (----ux)',
       'double/molossus-cretic 1 res (uuu--ux)',
       'double/molossus-cretic 1 res (-uuu-ux)',
       'double/molossus-cretic 1 res (-u-uuux)',
       'double/molossus-cretic molossus not chor 1 res (uu---ux)',
       'double/molossus-cretic molossus not chor 1 res (--uu-ux)',
       'double/molossus-cretic molossus not chor 1 res (---uuux)',
       'double/molossus-cretic chor res (-uu--ux)',
       'double/molossus-cretic ep res (-u---ux)']].sum(axis=1)
pliny_total['double-trochee'] = pliny[['double trochee (-u-x)',
       'double trochee 1 res (uuu-x)', 'double trochee 1 res (-uuux)']].sum(axis=1)
pliny_total['hypodochmiac'] = pliny[['hypodochmiac (-u-ux)', 'hypodochmiac 1 res (uuu-ux)',
       'hypodochmiac 1 res (-uuuux)']].sum(axis=1)
pliny_total['other'] = pliny[['spondaic (---x)', 'heroic (-uu-x)',
       'first paeon (-uux)', 'choriamb trochee (-uu--x)',
       'short sequence (uuuuux)', 'misc_clausulae']].sum(axis=1)
pliny_total['title'] = pliny['title'].values

In [43]:
pliny_total.head()

Unnamed: 0,cretic-trochee,double-molossus-cretic,double-trochee,hypodochmiac,other,title
0,39.0,30.0,17.0,15.0,50.0,epistulae 10 trajan
1,183.0,86.0,64.0,21.0,60.0,epistulae 8
2,200.0,98.0,81.0,20.0,97.0,epistulae 9
3,490.0,279.0,250.0,50.0,193.0,panegyricus
4,1.0,1.0,0.0,0.0,3.0,versus


In [44]:
chi_square_corpus(pliny_total)

Unnamed: 0,title,chi,p
0,epistulae 10 trajan,48.161174,0.0
1,epistulae 8,6.808644,0.14635
2,epistulae 9,5.132933,0.27393
3,panegyricus,4.14611,0.38659
4,versus,7.294109,0.12114
5,epistulae 4,3.279842,0.51213
6,epistulae 5,7.118238,0.12977
7,epistulae 7,4.589673,0.33205
8,epistulae 10 pliny,15.505164,0.00376
9,epistulae 6,0.392389,0.9831


Chi of Rhythms of All Categories Pliny
===

In [45]:
pliny.head()

Unnamed: 0,title,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),double/molossus-cretic pure double-cretic (-u--ux),double/molossus-cretic pure molossus-cretic (----ux),double/molossus-cretic 1 res (uuu--ux),double/molossus-cretic 1 res (-uuu-ux),double/molossus-cretic 1 res (-u-uuux),...,hypodochmiac (-u-ux),hypodochmiac 1 res (uuu-ux),hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),misc_clausulae,author
0,epistulae 10 trajan,22.0,6.0,4.0,7.0,10.0,7.0,0.0,2.0,1.0,...,15.0,0.0,0.0,26.0,6.0,14.0,3.0,0.0,1.0,pliny
1,epistulae 8,114.0,11.0,19.0,39.0,46.0,9.0,6.0,8.0,0.0,...,20.0,1.0,0.0,21.0,13.0,11.0,9.0,2.0,4.0,pliny
2,epistulae 9,117.0,18.0,19.0,46.0,53.0,12.0,18.0,2.0,2.0,...,16.0,4.0,0.0,32.0,15.0,16.0,7.0,14.0,13.0,pliny
3,panegyricus,298.0,30.0,60.0,102.0,162.0,29.0,22.0,11.0,8.0,...,45.0,2.0,3.0,101.0,21.0,28.0,24.0,1.0,18.0,pliny
4,versus,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,pliny


In [46]:
chi_square_corpus(pliny.drop(columns=['author']))

Unnamed: 0,title,chi,p
0,epistulae 10 trajan,106.648796,0.0
1,epistulae 8,32.449252,0.14542
2,epistulae 9,87.284535,0.0
3,panegyricus,30.028933,0.22321
4,versus,42.28396,0.01675
5,epistulae 4,17.656231,0.85663
6,epistulae 5,24.430674,0.49461
7,epistulae 7,20.911094,0.69756
8,epistulae 10 pliny,31.586426,0.17033
9,epistulae 6,23.532935,0.54647


Rhythmic/Unrhythmic Pliny
===

In [47]:
rhythmic_unrhythmic_pliny_df = pd.DataFrame()
rhythmic_unrhythmic_pliny_df['rhythmic'] = pliny_total[[
    'cretic-trochee', 'double-molossus-cretic', 'double-trochee', 'hypodochmiac'
]].sum(axis=1)
rhythmic_unrhythmic_pliny_df['unrhythmic'] = pliny_total['other'].values
rhythmic_unrhythmic_pliny_df['title'] = pliny_total['title'].values
rhythmic_unrhythmic_pliny_df.head()

Unnamed: 0,rhythmic,unrhythmic,title
0,101.0,50.0,epistulae 10 trajan
1,354.0,60.0,epistulae 8
2,399.0,97.0,epistulae 9
3,1069.0,193.0,panegyricus
4,2.0,3.0,versus


In [48]:
chi_square_corpus(rhythmic_unrhythmic_pliny_df)

Unnamed: 0,title,chi,p
0,epistulae 10 trajan,29.274457,0.0
1,epistulae 8,1.26381,0.26093
2,epistulae 9,3.176966,0.07468
3,panegyricus,1.836395,0.17537
4,versus,4.032413,0.04463
5,epistulae 4,0.437236,0.50846
6,epistulae 5,2.03121,0.1541
7,epistulae 7,0.003932,0.95
8,epistulae 10 pliny,2.303399,0.12909
9,epistulae 6,0.007141,0.93266
