In [61]:
import pandas as pd
import numpy as np
from scipy import stats

# Cicero Speeches Chi Square Results

In [62]:
cicero_df = pd.read_csv('../data/cicero_df.csv')
cicero_df.head()

Unnamed: 0,title,total_clausulae,total_excluded,abbrev_excluded,bracket_excluded,short_excluded,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),...,hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),total_artistic,misc_clausulae,total_unartistic,percent_clausulae
0,academica,228,8,0,5,2,32.0,5.0,11.0,7.0,...,1.0,20.0,7.0,7.0,4.0,1.0,178.0,3.0,42.0,0.822727
1,arati phaenomena,250,19,0,19,0,6.0,0.0,3.0,0.0,...,0.0,9.0,185.0,8.0,2.0,0.0,27.0,0.0,204.0,0.116883
2,arati prognotica,9,0,0,0,0,0.0,0.0,0.0,0.0,...,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0
3,brutus,1655,146,74,12,53,210.0,28.0,67.0,18.0,...,1.0,95.0,13.0,31.0,4.0,3.0,1336.0,27.0,173.0,0.903247
4,carmina fragmenta,125,0,0,0,0,0.0,0.0,0.0,0.0,...,0.0,8.0,54.0,3.0,3.0,0.0,56.0,1.0,69.0,0.456


In [63]:
speeches = [
       'de domo sua', 'de haruspicum responso', 'de lege agraria 1',
       'de lege agraria 2', 'de lege agraria 3',
       'de provinciis consularibus', 'in caecilium', 'in catilinam 1',
       'in catilinam 2', 'in catilinam 3', 'in catilinam 4', 'in pisonem',
       'in vatinium', 'in verrem 1', 'in verrem 2 1', 'in verrem 2 2',
       'in verrem 2 3', 'in verrem 2 4', 'in verrem 2 5', 'philippicae 1',
       'philippicae 10', 'philippicae 11', 'philippicae 12',
       'philippicae 13', 'philippicae 14', 'philippicae 2',
       'philippicae 3', 'philippicae 4', 'philippicae 5', 'philippicae 6',
       'philippicae 7', 'philippicae 8', 'philippicae 9',
       'post reditum ad populum', 'post reditum in senatu', 'pro archia',
       'pro balbo', 'pro caecina', 'pro caelio', 'pro cluentio',
       'pro flacco', 'pro fonteio', 'pro lege manilia', 'pro ligario',
       'pro marcello', 'pro milone', 'pro murena', 'pro plancio',
       'pro quinctio', 'pro rabirio perduellionis reo',
       'pro rabirio postumo', 'pro rege deiotaro', 'pro roscio amerino',
       'pro roscio comoedo', 'pro scauro', 'pro sestio', 'pro sulla',
       'pro tullio'
]
cicero_speeches = cicero_df.loc[cicero_df['title'].isin(speeches)]
print('Number of speeches: ', len(cicero_speeches))

Number of speeches:  58


In [64]:
cicero_speeches['title']

8                        de domo sua
11            de haruspicum responso
15                 de lege agraria 1
16                 de lege agraria 2
17                 de lege agraria 3
24        de provinciis consularibus
54                      in caecilium
56                    in catilinam 1
57                    in catilinam 2
58                    in catilinam 3
59                    in catilinam 4
60                        in pisonem
62                       in vatinium
64                       in verrem 1
65                     in verrem 2 1
66                     in verrem 2 2
67                     in verrem 2 3
68                     in verrem 2 4
69                     in verrem 2 5
78                     philippicae 1
79                    philippicae 10
80                    philippicae 11
81                    philippicae 12
82                    philippicae 13
83                    philippicae 14
84                     philippicae 2
85                     philippicae 3
8

In [65]:
contingency_table = cicero_speeches[['title', 'total_artistic', 'total_unartistic']]

In [66]:
total_artistic = contingency_table['total_artistic'].sum()
total_unartistic = contingency_table['total_unartistic'].sum()
print(total_artistic)
print(total_unartistic)

20864.0
4147.0


In [67]:
def chi_square_statistic(artistic, unartistic, p_value=False):
    obs = np.array([
        [artistic, unartistic],
        [total_artistic-artistic, total_unartistic-unartistic]
    ])
    chi2, p, _, _ = stats.chi2_contingency(obs, correction=False)
    
    if p_value:
        return round(p, 5)
    
    return chi2

In [68]:
contingency_table['chi2'] = contingency_table.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic']),
    axis=1
)
contingency_table['p'] = contingency_table.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic'], p_value=True),
    axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [69]:
print('Total Artistic: ', total_artistic)
print('Total Unartistic: ', total_unartistic)

Total Artistic:  20864.0
Total Unartistic:  4147.0


In [70]:
contingency_table

Unnamed: 0,title,total_artistic,total_unartistic,chi2,p
8,de domo sua,643.0,93.0,8.531744,0.00349
11,de haruspicum responso,346.0,55.0,2.418475,0.11991
15,de lege agraria 1,121.0,21.0,0.331554,0.56475
16,de lege agraria 2,470.0,90.0,0.107414,0.74311
17,de lege agraria 3,68.0,16.0,0.370834,0.54255
24,de provinciis consularibus,230.0,24.0,9.436392,0.00213
54,in caecilium,272.0,44.0,1.633088,0.20128
56,in catilinam 1,164.0,26.0,1.161291,0.2812
57,in catilinam 2,164.0,15.0,8.766315,0.00307
58,in catilinam 3,128.0,18.0,1.919551,0.16591


# Tacitus Chi Square Results

In [51]:
tacitus_df = pd.read_csv('../data/tacitus_df.csv')
exclude = ['annales', 'historiae']
tacitus_df = tacitus_df.loc[~tacitus_df['title'].isin(exclude)]

In [52]:
tacitus_df

Unnamed: 0,title,total_clausulae,total_excluded,abbrev_excluded,bracket_excluded,short_excluded,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),...,hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),total_artistic,misc_clausulae,total_unartistic,percent_clausulae
0,agricola,443,14,0,14,0,45.0,2.0,9.0,14.0,...,2.0,94.0,21.0,34.0,16.0,2.0,258.0,4.0,171.0,0.610723
2,annales book 1,769,16,11,5,0,66.0,10.0,10.0,21.0,...,6.0,176.0,49.0,69.0,23.0,0.0,429.0,7.0,324.0,0.579017
3,annales book 11,318,8,2,5,1,25.0,5.0,5.0,11.0,...,7.0,73.0,18.0,24.0,7.0,0.0,187.0,1.0,123.0,0.606452
4,annales book 12,473,5,3,2,0,45.0,8.0,15.0,20.0,...,3.0,111.0,25.0,42.0,16.0,1.0,267.0,6.0,201.0,0.583333
5,annales book 13,443,4,1,2,1,42.0,11.0,5.0,5.0,...,6.0,113.0,31.0,37.0,13.0,3.0,240.0,2.0,199.0,0.551253
6,annales book 14,528,11,1,6,2,47.0,6.0,12.0,22.0,...,5.0,115.0,36.0,40.0,21.0,1.0,295.0,9.0,222.0,0.588008
7,annales book 15,560,11,3,5,0,62.0,8.0,11.0,14.0,...,7.0,129.0,36.0,49.0,18.0,2.0,311.0,4.0,238.0,0.57377
8,annales book 16,237,2,2,1,0,22.0,7.0,3.0,11.0,...,3.0,48.0,14.0,18.0,8.0,1.0,141.0,5.0,94.0,0.621277
9,annales book 2,685,8,5,2,1,49.0,5.0,14.0,22.0,...,7.0,175.0,38.0,68.0,25.0,5.0,355.0,11.0,322.0,0.54062
10,annales book 3,605,15,8,2,1,61.0,14.0,9.0,17.0,...,6.0,125.0,49.0,59.0,26.0,2.0,324.0,5.0,266.0,0.557627


In [57]:
tacitus_contingency = tacitus_df[['title', 'total_artistic', 'total_unartistic']]

In [58]:
total_artistic = tacitus_contingency['total_artistic'].sum()
total_unartistic = tacitus_contingency['total_unartistic'].sum()
print(total_artistic)
print(total_unartistic)

6135.0
4510.0


In [59]:
tacitus_contingency['chi2'] = tacitus_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic']),
    axis=1
)
tacitus_contingency['p'] = tacitus_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic'], p_value=True),
    axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [60]:
tacitus_contingency

Unnamed: 0,title,total_artistic,total_unartistic,chi2,p
0,agricola,258.0,171.0,1.150772,0.28339
2,annales book 1,429.0,324.0,0.144813,0.70354
3,annales book 11,187.0,123.0,0.946163,0.3307
4,annales book 12,267.0,201.0,0.06777,0.79461
5,annales book 13,240.0,199.0,1.646321,0.19946
6,annales book 14,295.0,222.0,0.072998,0.78702
7,annales book 15,311.0,238.0,0.229652,0.63178
8,annales book 16,141.0,94.0,0.551535,0.45769
9,annales book 2,355.0,322.0,7.992377,0.0047
10,annales book 3,324.0,266.0,1.889013,0.16931


# Pliny Chi Square Results

In [82]:
pliny_df = pd.read_csv('../data/younger_pliny_df.csv')
exclude = ['epistulae', 'versus', 'epistulae 10']
pliny_df = pliny_df.loc[~pliny_df['title'].isin(exclude)]
pliny_df

Unnamed: 0,title,total_clausulae,total_excluded,abbrev_excluded,bracket_excluded,short_excluded,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),...,hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),total_artistic,misc_clausulae,total_unartistic,percent_clausulae
1,epistulae 1,533,42,4,0,35,122.0,13.0,14.0,42.0,...,4.0,24.0,13.0,8.0,5.0,3.0,420.0,18.0,71.0,0.892057
3,epistulae 10 pliny,415,18,1,9,1,83.0,18.0,11.0,32.0,...,3.0,35.0,11.0,17.0,7.0,2.0,320.0,5.0,77.0,0.81864
4,epistulae 10 trajan,153,2,0,2,0,22.0,6.0,4.0,7.0,...,0.0,25.0,6.0,14.0,3.0,0.0,102.0,1.0,49.0,0.682119
5,epistulae 2,512,32,2,0,30,131.0,8.0,18.0,41.0,...,1.0,30.0,10.0,10.0,2.0,3.0,413.0,12.0,67.0,0.885417
6,epistulae 3,533,28,1,0,25,117.0,19.0,13.0,49.0,...,2.0,41.0,7.0,11.0,15.0,1.0,422.0,8.0,83.0,0.851485
7,epistulae 4,531,51,4,0,46,115.0,12.0,21.0,48.0,...,1.0,28.0,9.0,12.0,8.0,3.0,409.0,11.0,71.0,0.875
8,epistulae 5,499,33,5,1,27,114.0,10.0,17.0,48.0,...,4.0,28.0,9.0,6.0,8.0,2.0,406.0,7.0,60.0,0.886266
9,epistulae 6,612,54,5,1,42,145.0,19.0,16.0,43.0,...,4.0,32.0,13.0,15.0,11.0,2.0,474.0,11.0,84.0,0.869176
10,epistulae 7,537,49,3,0,45,120.0,13.0,21.0,54.0,...,1.0,36.0,13.0,13.0,5.0,1.0,410.0,10.0,78.0,0.860656
11,epistulae 8,446,33,2,2,29,115.0,11.0,18.0,40.0,...,0.0,19.0,12.0,10.0,9.0,1.0,357.0,5.0,56.0,0.876513


In [83]:
pliny_contingency = pliny_df[['title', 'total_artistic', 'total_unartistic']]
total_artistic = pliny_contingency['total_artistic'].sum()
total_unartistic = pliny_contingency['total_unartistic'].sum()
print(total_artistic)
print(total_unartistic)
pliny_contingency['chi2'] = pliny_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic']),
    axis=1
)
pliny_contingency['p'] = pliny_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic'], p_value=True),
    axis=1
)
pliny_contingency

5209.0
972.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,title,total_artistic,total_unartistic,chi2,p
1,epistulae 1,420.0,71.0,0.644361,0.42214
3,epistulae 10 pliny,320.0,77.0,4.311374,0.03786
4,epistulae 10 trajan,102.0,49.0,32.668784,0.0
5,epistulae 2,413.0,67.0,1.226465,0.2681
6,epistulae 3,422.0,83.0,0.2092,0.6474
7,epistulae 4,409.0,71.0,0.342521,0.55838
8,epistulae 5,406.0,60.0,3.08914,0.07882
9,epistulae 6,474.0,84.0,0.208912,0.64762
10,epistulae 7,410.0,78.0,0.026611,0.87042
11,epistulae 8,357.0,56.0,1.567159,0.21062


In [84]:
pliny_df = pd.read_csv('../data/younger_pliny_df.csv')
exclude = ['epistulae', 'versus', 'epistulae 10 pliny', 'epistulae 10 trajan', 'panegyricus', 'epistulae 10']
pliny_df = pliny_df.loc[~pliny_df['title'].isin(exclude)]
pliny_df

Unnamed: 0,title,total_clausulae,total_excluded,abbrev_excluded,bracket_excluded,short_excluded,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),...,hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),total_artistic,misc_clausulae,total_unartistic,percent_clausulae
1,epistulae 1,533,42,4,0,35,122.0,13.0,14.0,42.0,...,4.0,24.0,13.0,8.0,5.0,3.0,420.0,18.0,71.0,0.892057
5,epistulae 2,512,32,2,0,30,131.0,8.0,18.0,41.0,...,1.0,30.0,10.0,10.0,2.0,3.0,413.0,12.0,67.0,0.885417
6,epistulae 3,533,28,1,0,25,117.0,19.0,13.0,49.0,...,2.0,41.0,7.0,11.0,15.0,1.0,422.0,8.0,83.0,0.851485
7,epistulae 4,531,51,4,0,46,115.0,12.0,21.0,48.0,...,1.0,28.0,9.0,12.0,8.0,3.0,409.0,11.0,71.0,0.875
8,epistulae 5,499,33,5,1,27,114.0,10.0,17.0,48.0,...,4.0,28.0,9.0,6.0,8.0,2.0,406.0,7.0,60.0,0.886266
9,epistulae 6,612,54,5,1,42,145.0,19.0,16.0,43.0,...,4.0,32.0,13.0,15.0,11.0,2.0,474.0,11.0,84.0,0.869176
10,epistulae 7,537,49,3,0,45,120.0,13.0,21.0,54.0,...,1.0,36.0,13.0,13.0,5.0,1.0,410.0,10.0,78.0,0.860656
11,epistulae 8,446,33,2,2,29,115.0,11.0,18.0,40.0,...,0.0,19.0,12.0,10.0,9.0,1.0,357.0,5.0,56.0,0.876513
12,epistulae 9,564,69,7,3,50,117.0,18.0,19.0,46.0,...,0.0,29.0,14.0,16.0,7.0,13.0,403.0,13.0,92.0,0.840404


In [85]:
pliny_contingency = pliny_df[['title', 'total_artistic', 'total_unartistic']]
total_artistic = pliny_contingency['total_artistic'].sum()
total_unartistic = pliny_contingency['total_unartistic'].sum()
print(total_artistic)
print(total_unartistic)
pliny_contingency['chi2'] = pliny_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic']),
    axis=1
)
pliny_contingency['p'] = pliny_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic'], p_value=True),
    axis=1
)
pliny_contingency

3714.0
662.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,title,total_artistic,total_unartistic,chi2,p
1,epistulae 1,420.0,71.0,0.192029,0.66123
5,epistulae 2,413.0,67.0,0.574456,0.44849
6,epistulae 3,422.0,83.0,0.760324,0.38323
7,epistulae 4,409.0,71.0,0.047492,0.82749
8,epistulae 5,406.0,60.0,2.060846,0.15113
9,epistulae 6,474.0,84.0,0.002743,0.95823
10,epistulae 7,410.0,78.0,0.313187,0.57573
11,epistulae 8,357.0,56.0,0.873996,0.34985
12,epistulae 9,403.0,92.0,5.19775,0.02262


# Varro Chi Square Results

In [89]:
varro_df = pd.read_csv('../data/varro_df.csv')
include = [
    'de lingua latina book 5',
    'de lingua latina book 6',
    'de lingua latina book 7',
    'de lingua latina book 8',
    'de lingua latina book 9',
    'res rustica book 1',
    'res rustica book 2',
    'res rustica book 3'
]
varro_df = varro_df.loc[varro_df['title'].isin(include)]
varro_df

Unnamed: 0,title,total_clausulae,total_excluded,abbrev_excluded,bracket_excluded,short_excluded,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),...,hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),total_artistic,misc_clausulae,total_unartistic,percent_clausulae
7,de lingua latina book 5,986,231,0,222,7,71.0,19.0,13.0,19.0,...,12.0,192.0,44.0,81.0,36.0,3.0,388.0,11.0,367.0,0.528477
8,de lingua latina book 6,515,132,2,125,5,43.0,5.0,4.0,10.0,...,4.0,104.0,17.0,44.0,18.0,2.0,189.0,9.0,194.0,0.516971
9,de lingua latina book 7,617,201,1,161,25,34.0,5.0,9.0,10.0,...,4.0,74.0,32.0,44.0,16.0,3.0,208.0,39.0,208.0,0.59375
10,de lingua latina book 8,285,96,0,95,1,13.0,2.0,7.0,6.0,...,10.0,46.0,6.0,17.0,5.0,8.0,104.0,3.0,85.0,0.566138
11,de lingua latina book 9,672,174,1,168,6,51.0,11.0,14.0,22.0,...,22.0,113.0,26.0,50.0,21.0,15.0,268.0,5.0,230.0,0.548193
21,res rustica book 1,864,150,3,132,10,54.0,23.0,30.0,27.0,...,14.0,184.0,51.0,73.0,26.0,5.0,372.0,3.0,342.0,0.52521
22,res rustica book 2,702,122,0,118,3,57.0,11.0,9.0,12.0,...,8.0,156.0,32.0,74.0,24.0,5.0,283.0,6.0,297.0,0.498276
23,res rustica book 3,585,124,3,116,5,50.0,13.0,9.0,16.0,...,6.0,143.0,27.0,45.0,17.0,3.0,223.0,3.0,238.0,0.490239


In [90]:
varro_contingency = varro_df[['title', 'total_artistic', 'total_unartistic']]
total_artistic = varro_contingency['total_artistic'].sum()
total_unartistic = varro_contingency['total_unartistic'].sum()
print(total_artistic)
print(total_unartistic)
varro_contingency['chi2'] = varro_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic']),
    axis=1
)
varro_contingency['p'] = varro_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic'], p_value=True),
    axis=1
)
varro_contingency

2035.0
1961.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,title,total_artistic,total_unartistic,chi2,p
7,de lingua latina book 5,388.0,367.0,0.080471,0.77666
8,de lingua latina book 6,189.0,194.0,0.422422,0.51573
9,de lingua latina book 7,208.0,208.0,0.159293,0.68981
10,de lingua latina book 8,104.0,85.0,1.334729,0.24797
11,de lingua latina book 9,268.0,230.0,1.900377,0.16804
21,res rustica book 1,372.0,342.0,0.480183,0.48834
22,res rustica book 2,283.0,297.0,1.234963,0.26644
23,res rustica book 3,223.0,238.0,1.358901,0.24373


# Caesar Chi Square Results

In [95]:
caesar_df = pd.read_csv('../data/caesar_df.csv')
include = [
    'bellum civile book 1',
    'bellum civile book 2',
    'bellum civile book 3',
    'de bello gallico book 1',
    'de bello gallico book 2',
    'de bello gallico book 3',
    'de bello gallico book 4',
    'de bello gallico book 5',
    'de bello gallico book 6',
    'de bello gallico book 7'
]
caesar_df = caesar_df.loc[caesar_df['title'].isin(include)]
caesar_df

Unnamed: 0,title,total_clausulae,total_excluded,abbrev_excluded,bracket_excluded,short_excluded,cretic-trochee (-u--x),cretic-trochee 1 res (uuu--x),cretic-trochee 1 res (-uuu-x),cretic-trochee 1 res (-u-uux),...,hypodochmiac 1 res (-uuuux),spondaic (---x),heroic (-uu-x),first paeon (-uux),choriamb trochee (-uu--x),short sequence (uuuuux),total_artistic,misc_clausulae,total_unartistic,percent_clausulae
2,bellum civile book 1,705,22,10,10,2,107.0,15.0,16.0,18.0,...,3.0,120.0,23.0,51.0,23.0,1.0,462.0,3.0,221.0,0.68082
3,bellum civile book 2,382,8,5,3,0,58.0,11.0,11.0,5.0,...,3.0,74.0,19.0,30.0,5.0,2.0,240.0,4.0,134.0,0.652406
4,bellum civile book 3,895,162,6,23,0,96.0,24.0,22.0,12.0,...,3.0,157.0,40.0,39.0,20.0,6.0,463.0,8.0,270.0,0.642565
8,de bello gallico book 1,464,4,3,0,1,50.0,7.0,14.0,8.0,...,5.0,105.0,29.0,29.0,16.0,2.0,277.0,2.0,183.0,0.606522
9,de bello gallico book 2,192,3,1,2,0,28.0,3.0,7.0,3.0,...,2.0,53.0,7.0,8.0,4.0,1.0,116.0,0.0,73.0,0.613757
10,de bello gallico book 3,156,2,1,1,0,18.0,1.0,7.0,2.0,...,1.0,43.0,7.0,5.0,3.0,0.0,95.0,1.0,59.0,0.623377
11,de bello gallico book 4,219,2,0,2,0,33.0,2.0,6.0,5.0,...,0.0,50.0,13.0,11.0,11.0,1.0,127.0,4.0,90.0,0.603687
12,de bello gallico book 5,437,4,3,1,0,75.0,10.0,8.0,15.0,...,2.0,89.0,12.0,32.0,5.0,2.0,289.0,4.0,144.0,0.676674
13,de bello gallico book 6,324,0,0,0,0,55.0,5.0,8.0,8.0,...,1.0,52.0,7.0,20.0,7.0,2.0,233.0,3.0,91.0,0.728395
14,de bello gallico book 7,692,8,3,4,1,82.0,15.0,22.0,19.0,...,2.0,151.0,23.0,49.0,21.0,6.0,425.0,9.0,259.0,0.634503


In [96]:
caesar_contingency = caesar_df[['title', 'total_artistic', 'total_unartistic']]
total_artistic = caesar_contingency['total_artistic'].sum()
total_unartistic = caesar_contingency['total_unartistic'].sum()
print(total_artistic)
print(total_unartistic)
caesar_contingency['chi2'] = caesar_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic']),
    axis=1
)
caesar_contingency['p'] = caesar_contingency.apply(
    lambda row: chi_square_statistic(row['total_artistic'], row['total_unartistic'], p_value=True),
    axis=1
)
caesar_contingency

2727.0
1524.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,title,total_artistic,total_unartistic,chi2,p
2,bellum civile book 1,462.0,221.0,4.317489,0.03772
3,bellum civile book 2,240.0,134.0,8.3e-05,0.99275
4,bellum civile book 3,463.0,270.0,0.373315,0.5412
8,de bello gallico book 1,277.0,183.0,3.468031,0.06257
9,de bello gallico book 2,116.0,73.0,0.661794,0.41593
10,de bello gallico book 3,95.0,59.0,0.420908,0.51648
11,de bello gallico book 4,127.0,90.0,3.145276,0.07615
12,de bello gallico book 5,289.0,144.0,1.410612,0.23495
13,de bello gallico book 6,233.0,91.0,9.192949,0.00243
14,de bello gallico book 7,425.0,259.0,1.439306,0.23025
