# A Gospel in the Language of Lies
### Interactive Figures and Data Appendix

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import os

In [None]:
file_name = os.path.join('data', 'leave_one_out.csv')
df = pd.read_csv(file_name, na_values=[''], keep_default_na=False)

# relabel lines of unknown source
df['hom_work'] = df['hom_work'].replace('0', np.NaN)
df['hom_spkr_being'] = df['hom_spkr_being'].replace('0', np.NaN)

# make deception tags ordered categorical values
df['dec_line'] = pd.Categorical(df['dec_line'],
    categories = ['unclassified', 'None', 'Possible', 'Strong'], ordered=True)
df['dec_speech'] = pd.Categorical(df['dec_speech'],
    categories = ['unclassified', 'None', 'Possible', 'Strong'], ordered=True)

## Figures

In [None]:
sns.set_style('white')
pal = reversed(sns.color_palette('hls')[:4])

### Overview

In [None]:
x = df.groupby('hom_work').agg(
    lines = ('coefficient.1', 'sum'),
)

fig, ax = plt.subplots()
ax.pie(x['lines'], labels=x.index)

plt.show()

In [None]:
# simplified list of characters
spkrs = df['hom_spkr_being']

for name in ['Odysseus', 'Achilles', 'Telemachus', 'Penelope',]:
    spkrs[df['hom_spkr'].str.startswith(name)] = name

x = df.groupby(spkrs).agg(
    lines = ('coefficient.1', 'sum'),
)

fig, ax = plt.subplots()
ax.pie(x['lines'], labels=x.index)

plt.show()

In [None]:
df.loc[df['hom_spkr'].str.contains('-'), ['hom_spkr']]

In [None]:
is_disg = df['hom_spkr'].str.contains('-')

x = df.groupby(is_disg).agg(
    lines = ('coefficient.1', 'sum'),
)

fig, ax = plt.subplots()
ax.pie(x['lines'], labels=x.index)
ax.set_title('lines spoken in disguise')

plt.show()

In [None]:
x = df.loc[is_disg].groupby('hom_spkr').agg(
    lines = ('coefficient.1', 'sum'),
).sort_index()

fig, ax = plt.subplots()
ax.pie(x['lines'], labels=x.index)
ax.set_title('lines spoken in disguise')

plt.show()

In [None]:
df.groupby(['eud_line', 'segment']).agg(
    eud_seq = ('eud_seq', 'first'),
    n_segs = ('line-with-segments', 'first'),
    eud_spkr = ('eud_spkr', 'first'),
    eud_addr = ('eud_addr_all', 'first'),
    deceptive_line = ('dec_line', 'max'),
    deceptive_speech = ('dec_speech', 'max'),
)

In [None]:
x = df.loc[df['eud_seq']==1]
pd.crosstab(x['eud_line'], x['deceptive_speech'], normalize='index').sum().plot.pie()
    
        

In [None]:
def plotEudociaSpeech(seq, filename=None):
    sns.catplot(
        data = df.loc[df['eud_seq']==seq], 
        x = 'eud_line', 
        y = 'hom_spkr', 
        hue = 'dec_line', 
        kind = 'swarm',
        palette = pal,
        aspect = 1.4,
    )
    if filename is not None:
        plt.savefig(filename)
    plt.show()

In [None]:
plotEudociaSpeech(83)

In [None]:
sns.catplot(
        data = df, 
        x = 'eud_line', 
        y = 'eud_spkr', 
        hue = 'dec_speech', 
        kind = 'strip',
        #palette = pal,
        #aspect = 2.5,
    )
#plt.savefig('foo.pdf')

In [None]:
valid_names = df.loc[df['dec_speech'].isin(['Strong']), 'eud_spkr'].unique()

sns.catplot(
        data = df.loc[df['eud_spkr'].isin(valid_names)], 
        x = 'eud_line', 
        y = 'eud_spkr', 
        hue = 'dec_speech', 
        kind = 'strip',
        #palette = pal,
        aspect = 2.5,
    )