In [3]:
import json
import os
import pandas as pd

Analysis of specific stories

In [8]:
translation = 'More_translation'

for story_name in os.listdir(translation):
    if story_name == 'full_text' or story_name == 'medea' or story_name.endswith('.csv'):
        continue

    curr_dir = translation + '/' + story_name
    story_char_info = {}
    
    # Read in the booknlp output
    with open(curr_dir + '/' + story_name + '.book') as f:
        book = json.load(f)
    
    # Read in the character indices
    with open(curr_dir + '/' + story_name + '_char_ids.json') as f:
        char_to_idx = json.load(f)
    
    # create dict of indices to characters
    idx_to_char = {idx: char_name for char_name, indices in char_to_idx.items() for idx in indices}

    # fill in the story_char_info dict
    for char_name in char_to_idx:
        story_char_info[char_name] = {
            'agent': [],
            'patient': [],
            'mod': [],
            'poss': [],
            'gender': ''
        }
    
    for char in book['characters']:
        if char['id'] in idx_to_char:
            char_name = idx_to_char[char['id']]
        else:
            continue
        
        story_char_info[char_name]['agent'] += char['agent']
        story_char_info[char_name]['patient'] += char['patient']
        story_char_info[char_name]['mod'] += char['mod']
        story_char_info[char_name]['poss'] += char['poss']
        
        if story_char_info[char_name]['gender'] == '' and char['g'] is not None:
            story_char_info[char_name]['gender'] = char['g']['argmax']
    
    # save the story_char_info dict as json
    with open(curr_dir + '/' + story_name + '_char_info.json', 'w') as f:
        json.dump(story_char_info, f)

General Analysis of whole text

In [5]:
path = 'drive/MyDrive/2022-2023/Ovid_Metamorphoses/'
with open(path + 'ovid.book', 'r') as f:
  book = json.load(f)

In [76]:
char_info = {}
for char in book['characters']:
  # skip if character is only referred to by pronouns
  if not (char['mentions']['proper'] or char['mentions']['common']):
    continue
  if char['g'] is None:
    continue

  id = char['id']
  char_info[id] = {
      'names': [name['n'] for name in char['mentions']['proper']] + [name['n'] for name in char['mentions']['common']],
      'gender': char['g']['argmax'],
      'total_agent': len(char['agent']),
      'total_object': len(char['patient'])
  }
  char_info[id]['name'] = char_info[id]['names'][0]

In [84]:
char_df = pd.DataFrame(char_info).T
char_df.reset_index(inplace=True)

In [85]:
char_df.drop(char_df[(char_df.total_agent + char_df.total_object) < 10].index, inplace=True)
char_df.reset_index(inplace=True, drop=True)

In [86]:
char_df

Unnamed: 0,index,names,gender,total_agent,total_object,name
0,5864,"[the goddess, The goddess]",she/her,98,25,the goddess
1,499,"[Jupiter, Ammon, mighty Jupiter, Jupiter Ammon...",he/him/his,76,21,Jupiter
2,605,"[Jove, Hector, Ajax, mighty Jove, brave Ajax, ...",he/him/his,71,20,Jove
3,1482,"[the god, The god]",he/him/his,99,12,the god
4,1538,"[the gods, The gods]",they/them/their,56,25,the gods
...,...,...,...,...,...,...
187,6252,"[the priest, The priest]",he/him/his,9,1,the priest
188,7104,[the Colchian witch],she/her,10,0,the Colchian witch
189,1023,[Lelex],he/him/his,9,1,Lelex
190,1685,[Venus ’s son],he/him/his,10,1,Venus ’s son


In [87]:
char_df['perc_agent'] = char_df['total_agent'] / (char_df['total_agent'] + char_df['total_object'])

In [88]:
char_df.to_csv(path + 'gender_info_2.csv')

In [89]:
# average agent per gender
genders = char_df['gender'].unique()
avg_gender_agent = {}
for gender in genders:
  gender_df = char_df[char_df['gender'] == gender]
  total_agent = gender_df['total_agent'].sum()
  total_object = gender_df['total_object'].sum()
  
  avg_gender_agent[gender] = total_agent / (total_agent + total_object)

In [90]:
avg_gender_agent

{'she/her': 0.7945454545454546,
 'he/him/his': 0.8196847366397539,
 'they/them/their': 0.7614942528735632}