## Preliminaries

### imports

In [1]:
from dicesapi import DicesAPI
import pandas as pd

### global variables

In [2]:
dices_server = 'http://localhost:8000/api'
input_file = '/Users/chris/Dropbox/DICES - Research Assistants/Marie Kisser/all_data-1_0b.xlsx'
sheet_name = 'Homerocentones_linesfromHomer'

### DICES connection

In [3]:
api = DicesAPI(dices_api=dices_server, logfile='dices.log')

### function definitions

In [4]:
def gender_all(insts):
    '''returns a simple gender label for a group of instances'''
    genders = '-'.join(sorted(set([inst.gender for inst in insts])))
    if len(genders) == 0:
        return None    
    
    if genders == 'male':
        return 'male'
    if genders == 'female':
        return 'female'
    
    return 'other'
    
def gender_first(insts):
    '''returns a gender label for the first instance of a group'''
    genders = [inst.gender for inst in insts]
    if len(genders) == 0:
        return None
        
    if genders[0] == 'male':
        return 'male'
    if genders[0] == 'female':
        return 'female'
    
    return 'other'

In [5]:
def being_all(insts):
    '''returns a simple being label for a group of instances'''
    beings = '-'.join(sorted(set([inst.being for inst in insts])))
    if len(beings) == 0:
        return None
    
    if beings == 'mortal':
        return 'mortal'
    if beings == 'divine':
        return 'divine'
    
    return 'other'
    
def being_first(insts):
    '''returns a being label for the first instance of a group'''
    beings = [inst.being for inst in insts]
    if len(beings) == 0:
        return None    
    
    if beings[0] == 'moral':
        return 'mortal'
    if beings[0] == 'divine':
        return 'divine'
    
    return 'other'

In [6]:
def speech_to_lines(speech):
    '''turn a speech into a pandas dataframe with one line per row'''
    book = int(speech.l_fi.split('.')[0])
    l_first = int(speech.l_fi.split('.')[1])
    l_last = int(speech.l_la.split('.')[1])
    
    df = pd.DataFrame(dict(
        work = speech.work.urn,
        book = book,
        line = line,
        spkr_first = speech.spkr[0].name,
        spkr_all = speech.getSpkrString(),
        spkr_gender_first = gender_first(speech.spkr),
        spkr_gender_all = gender_all(speech.spkr),
        spkr_being_first = being_first(speech.spkr),
        spkr_being_all = being_all(speech.spkr),
        addr_first = speech.addr[0].name,
        addr_all = speech.getAddrString(),
        addr_gender_first = gender_first(speech.addr),
        addr_gender_all = gender_all(speech.addr),
        addr_being_first = being_first(speech.addr),
        addr_being_all = being_all(speech.addr),
    ) for line in range(l_first, l_last+1))
    
    return df

## Data

### Homer speech data

In [7]:
hom_speeches = sorted(api.getSpeeches(work_title='Iliad') + api.getSpeeches(work_title='Odyssey'))
hom_df = pd.concat([speech_to_lines(s) for s in hom_speeches])
hom_df = hom_df.rename(columns={
            'work':'hom_work',
            'book':'hom_book',
            'line':'hom_line',
            'spkr_first':'hom_spkr_first',
            'spkr_all':'hom_spkr_all',    
            'spkr_gender_first':'hom_spkr_gender_first',
            'spkr_gender_all':'hom_spkr_gender_all',
            'spkr_being_first':'hom_spkr_being_first',
            'spkr_being_all':'hom_spkr_being_all',
            'addr_first':'hom_addr_first',
            'addr_all':'hom_addr_all',    
            'addr_gender_first':'hom_addr_gender_first',
            'addr_gender_all':'hom_addr_gender_all',
            'addr_being_first':'hom_addr_being_first',
            'addr_being_all':'hom_addr_being_all',
})

### Eudocia speech data

In [8]:
eud_speeches = api.getSpeeches(work_title='Homerocentones')
eud_df = pd.DataFrame(dict(
    eud_seq = s.seq,
    eud_spkr_first = s.spkr[0].name if len(s.spkr) > 0 else None,
    eud_spkr_all = s.getSpkrString(),
    eud_spkr_gender_first = gender_first(s.spkr),
    eud_spkr_gender_all = gender_all(s.spkr),
    eud_spkr_being_first = being_first(s.spkr),
    eud_spkr_being_all = being_all(s.spkr),
    eud_addr_first = s.addr[0].name if len(s.addr) > 0 else None,
    eud_addr_all = s.getAddrString(),
    eud_addr_gender_first = gender_first(s.addr),
    eud_addr_gender_all = gender_all(s.addr),
    eud_addr_being_first = being_first(s.addr),
    eud_addr_being_all = being_all(s.addr),
) for s in eud_speeches)

### source lines for cento

In [9]:
cento_df = pd.read_excel(input_file, 
            sheet_name=sheet_name, 
            header=1,
            usecols=[0,1,2,3,4],
            keep_default_na=False,
            )
cento_df = cento_df.rename(columns={
    'line':'eud_line',
    'work':'hom_work',
    'book':'hom_book',
    'line.1':'hom_line', 
    'seq':'eud_seq',

})
cento_df.loc[cento_df.hom_work=='Il.','hom_work'] = 'urn:cts:greekLit:tlg0012.tlg001.perseus-grc2'
cento_df.loc[cento_df.hom_work=='Od.','hom_work'] = 'urn:cts:greekLit:tlg0012.tlg002.perseus-grc2'

### add speaker, addressee info from eud_speeches

In [10]:
df = pd.merge(cento_df, eud_df, how='left', on='eud_seq')

### add speaker, addressee info from hom_speeches

In [11]:
joint_df=pd.merge(df, hom_df, how='left', on=['hom_work', 'hom_book', 'hom_line'])

In [12]:
hom_df.to_csv('homer.csv')
eud_df.to_csv('eudocia.csv')
joint_df.to_csv('output.csv')

In [14]:
joint_df.loc[joint_df.eud_spkr_all=='Jesus']

Unnamed: 0,eud_seq,eud_line,hom_work,hom_book,hom_line,eud_spkr_first,eud_spkr_all,eud_spkr_gender_first,eud_spkr_gender_all,eud_spkr_being_first,...,hom_spkr_gender_first,hom_spkr_gender_all,hom_spkr_being_first,hom_spkr_being_all,hom_addr_first,hom_addr_all,hom_addr_gender_first,hom_addr_gender_all,hom_addr_being_first,hom_addr_being_all
106,3,178,urn:cts:greekLit:tlg0012.tlg001.perseus-grc2,14,212,Jesus,Jesus,male,male,other,...,female,female,divine,divine,Hera,Hera,female,female,divine,divine
107,3,179,urn:cts:greekLit:tlg0012.tlg002.perseus-grc2,7,312,Jesus,Jesus,male,male,other,...,male,male,other,mortal,Odysseus,Odysseus,male,male,other,mortal
108,3,180,urn:cts:greekLit:tlg0012.tlg001.perseus-grc2,22,235,Jesus,Jesus,male,male,other,...,male,male,other,mortal,Athena,Athena,female,female,divine,divine
109,3,181,urn:cts:greekLit:tlg0012.tlg002.perseus-grc2,16,17,Jesus,Jesus,male,male,other,...,,,,,,,,,,
110,3,182,urn:cts:greekLit:tlg0012.tlg002.perseus-grc2,16,136,Jesus,Jesus,male,male,other,...,male,male,other,mortal,Telemachus,Telemachus,male,male,other,mortal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1053,90,2317,urn:cts:greekLit:tlg0012.tlg001.perseus-grc2,9,492,Jesus,Jesus,male,male,other,...,male,male,other,mortal,Achilles,Achilles,male,male,other,mortal
1063,92,2341,urn:cts:greekLit:tlg0012.tlg001.perseus-grc2,1,56,Jesus,Jesus,male,male,other,...,,,,,,,,,,
1064,92,2342,urn:cts:greekLit:tlg0012.tlg002.perseus-grc2,22,392,Jesus,Jesus,male,male,other,...,male,male,other,mortal,Telemachus,Telemachus,male,male,other,mortal
1065,92,2343,urn:cts:greekLit:tlg0012.tlg002.perseus-grc2,15,65,Jesus,Jesus,male,male,other,...,male,male,other,mortal,Menelaus,Menelaus,male,male,other,mortal
