# A hierarchy of Semantic Roles

In [1]:
PATH = 'datasets/'

import pandas as pd
import collections

from tf.app import use

In [2]:
A = use('bhsa', hoist=globals(), mod='ch-jensen/participants/actor/tf')

	connecting to online GitHub repo annotation/app-bhsa ... connected
Using TF-app in C:\Users\Ejer/text-fabric-data/annotation/app-bhsa/code:
	rv1.2=#5fdf1778d51d938bfe80b37b415e36618e50190c (latest release)
	connecting to online GitHub repo etcbc/bhsa ... connected
Using data in C:\Users\Ejer/text-fabric-data/etcbc/bhsa/tf/c:
	rv1.6=#bac4a9f5a2bbdede96ba6caea45e762fe88f88c5 (latest release)
	connecting to online GitHub repo etcbc/phono ... connected
Using data in C:\Users\Ejer/text-fabric-data/etcbc/phono/tf/c:
	r1.2=#1ac68e976ee4a7f23eb6bb4c6f401a033d0ec169 (latest release)
	connecting to online GitHub repo etcbc/parallels ... connected
Using data in C:\Users\Ejer/text-fabric-data/etcbc/parallels/tf/c:
	r1.2=#395dfe2cb69c261862fab9f0289e594a52121d5c (latest release)
	connecting to online GitHub repo ch-jensen/participants ... connected
Using data in C:\Users\Ejer/text-fabric-data/ch-jensen/participants/actor/tf/c:
	r1.7=#1c17398f92c0836c06de5e1798687c3fa18133cf (latest release)
   |  

## 1. Parsing participants in Leviticus 17-26

The participants of Leviticus 17-26 need to parsed with the three parameters (Instigation, Volition, and Affectedness), annotated in other notebooks. The reason for parsing the participants is negations (typically L> לא) cancel out some of the features, according to Næss (2007, 114-117). 

In [3]:
#A dictionary of columns to be imported as integers.
int_cols = {col:'Int64' for col in ['clause','Act_phr','Und1_phr','Und2_phr']}

aff_vol = pd.read_csv(f'{PATH}Lev17-26.Volition_Affectedness_all_cor_4.csv', dtype=int_cols)
aff_vol.head(20)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,clause,lex,Act_phr,Act_vol,Act_aff,Und1_phr,Und1_vol,Und1_aff,Und2_phr,Und2_vol,Und2_aff,comment
0,0,0,439650,DBR[,688348.0,y,n,688349.0,y,y,,,,
1,1,1,439651,>MR[,688350.0,y,n,,,,,,,
2,2,2,439652,DBR[,688351.0,y,n,688352.0,y,y,,,,
3,3,3,439653,>MR[,688354.0,y,n,688355.0,y,y,,,,
4,4,4,439655,YWH[,688360.0,y,n,688358.0,n,n,,,,
5,5,5,439656,>MR[,688361.0,y,n,,,,,,,
6,6,6,439658,CXV[,688364.0,y,n,688365.0,n,y,,,,
7,7,7,439659,CXV[,688369.0,y,n,,,,,,,
8,8,8,439660,BW>[,688374.0,y,n,688374.0,n,y,688372.0,n,n,
9,9,9,439661,QRB[,688375.0,y,n,688376.0,n,y,688377.0,n,n,


In [4]:
inst = pd.read_csv(f'{PATH}Lev17-26.Instigation_final.csv')
inst.head()

Unnamed: 0,clause,phr,Inst
0,439650,688348,y
1,439651,688350,y
2,439652,688351,y
3,439653,688354,y
4,439655,688360,y


In [5]:
new_dict = {}

for row in aff_vol.iterrows():
    Act = row[1].Act_phr
    Und1 = row[1].Und1_phr
    Und2 = row[1].Und2_phr
    
    #If some of the references are equal, there is an object suffix
    if F.function.v(Act) in {'PreO','PtcO'} or F.function.v(Und1) in {'PreO','PtcO'} or F.function.v(Und2) in {'PreO','PtcO'}:
        for w in L.d(Und1, 'word'): #The object suffix is stored on the word node to avoid confusion with the predicate
            if F.sp.v(w) == 'verb':
                Und1 = w
        
    new_dict[Act] = [row[1].Act_vol, row[1].Act_aff]
    new_dict[Und1] = [row[1].Und1_vol, row[1].Und1_aff]
    new_dict[Und2] = [row[1].Und2_vol, row[1].Und2_aff]

In [6]:
#new_dict

In [7]:
new_dict_2 = {}

for ref in new_dict:
    if ref > 0:
        new_dict_2[ref] = new_dict[ref] #Volition and affectedness transferred to new dictionary
        
        if ref in list(inst.phr):
            new_dict_2[ref].append('y')
        else:
            new_dict_2[ref].append('n')
        
df = pd.DataFrame(new_dict_2).T
df.columns = ['Vol', 'Aff', 'Inst']

In [8]:
df.head()

Unnamed: 0,Vol,Aff,Inst
688348,y,n,y
688349,y,y,n
688350,y,n,y
688351,y,n,y
688352,y,y,n


In the previous work, not all Complement phrases were annotated. Therefore, we identify all remaining Complements and annotate them for "negative" in all aspects:

In [9]:
all_Cmpl = '''
book book=Leviticus
 chapter chapter=17|18|19|20|21|22|23|24|25|26
  clause
   phrase function=Cmpl
'''

all_Cmpl = A.search(all_Cmpl)

  0.47s 481 results


In [10]:
add_complements = {}

for r in all_Cmpl:
    Cmpl = r[3]
    
    if Cmpl not in df.index:
        add_complements[Cmpl] = ['n','n','n']

complements_df = pd.DataFrame(add_complements).T
complements_df.columns = ['Vol','Aff','Inst']

df = pd.concat([df, complements_df])

Before parsing, we walk through each phrase to see if it occurs in a clause with a negation. Negations occur in negative phrases (NegP) but also as part of verb phrases, e.g. Lev 18:30:

In [11]:
#A.pretty(T.nodeFromSection(('Leviticus', 18, 30)))

In [12]:
negations = []

for ref in list(df.index):
    cl = L.u(ref, 'clause')[0]
    neg = False
    
    for phr in L.d(cl, 'phrase'):
        
        #If a negative phrase occurs in the clause
        if F.typ.v(phr) == 'NegP':
            neg = True
            
        #If a negative lexeme (BLT/) occurs in a verbal phrase of the clause
        elif F.typ.v(phr) == 'VP':
            for w in L.d(phr, 'word'):
                if F.lex.v(w) == 'BLT/':
                    neg = True
    
    #The annotation is added to the list
    if neg:
        negations.append('neg')
    else:
        negations.append('')
        
df.insert(3, 'neg', negations) #The list is added as a column

In [13]:
roles = {'Agent':['y','y','n'],
         'Force':['n','y','n'],
         'Affected Agent': ['y','y','y'],
         'Instrument': ['n','y','y'],
         'Volitional Undergoer': ['y','n','y'],
         'Frustrative': ['y','n','n'],
         'Neutral': ['n','n','n'],
         'Patient': ['n','n','y']}

In [14]:
role_list = []

for n, row in df.iterrows():
    role = ''
    param = [row.Vol, row.Inst, row.Aff]
    
    for r in roles:
        if roles[r] == param:
            role = r
            
    role_list.append(role)
    
df.insert(4, "role", role_list)

#### Verify role annotation:

In [15]:
pd.crosstab(index=df.role, columns='count')

col_0,count
role,Unnamed: 1_level_1
,38
Affected Agent,144
Agent,496
Force,9
Instrument,17
Neutral,407
Patient,539
Volitional Undergoer,244


39 participants have not been annotated because they fall out of the schema. These cases need to be inspected:

In [16]:
inspect_data = df[df.role == '']
inspect_data

Unnamed: 0,Vol,Aff,Inst,neg,role
688711,?,y,n,neg,
688730,?,y,n,neg,
688771,?,n,y,,
688797,?,n,y,neg,
688802,?,n,y,,
689035,?,y,n,,
689144,?,n,y,neg,
689188,?,?,n,,
689336,?,y,n,,
689339,?,y,n,,


All cases involve a question-mark that indicates an ambigious case. These cases will be handled consistently and conservatively in that an ambigious case is treated as negative.

In [17]:
upd_df = df.replace('?', 'n')

Now we can add the last roles:

In [18]:
for n, row in upd_df.iterrows():
    if not row.role:
    
        role = ''
        param = [row.Vol, row.Inst, row.Aff]
    
        for r in roles:
            if roles[r] == param:
                role = r
            
        upd_df.at[n, 'role'] = role

Last check:

In [19]:
pd.crosstab(index=upd_df.role, columns='count')

col_0,count
role,Unnamed: 1_level_1
Affected Agent,144
Agent,496
Force,21
Instrument,17
Neutral,413
Patient,559
Volitional Undergoer,244


All cases have been succesfully annotated.

### 1.b Parsing

In [20]:
upd_df = upd_df[['Vol', 'Inst', 'Aff', 'neg', 'role']]

In [21]:
parsing_rules = {'Agent':'Frustrative',
                 'Force':'Neutral',
                 'Affected Agent':'Frustrative',
                 'Instrument':'Patient',
                 'Volitional Undergoer':'Frustrative',
                 'Neutral':'Neutral',
                 'Patient':'Neutral'
                }

In [22]:
new_roles = []

for n, row in upd_df.iterrows():
    new_role = ''
    
    if row.neg and row.role:
        new_role = parsing_rules[row.role]
        
    else:
        new_role = row.role
        
    new_roles.append(new_role)
    
upd_df.insert(5, "new_role", new_roles)

In [23]:
upd_df.head()

Unnamed: 0,Vol,Inst,Aff,neg,role,new_role
688348,y,y,n,,Agent,Agent
688349,y,n,y,,Volitional Undergoer,Volitional Undergoer
688350,y,y,n,,Agent,Agent
688351,y,y,n,,Agent,Agent
688352,y,n,y,,Volitional Undergoer,Volitional Undergoer


## 2. Establishing a hierarchy of semantic roles

In [24]:
roles = {'Agent':[1,1,0], 'Force':[0,1,0], 'Affected Agent': [1,1,1], 'Instrument': [0,1,1],
         'Volitional Undergoer': [1,0,1], 'Frustrative': [1,0,0], 'Neutral': [0,0,0], 'Patient': [0,0,1]}

data = pd.DataFrame(roles).T
data.columns = ['Volition', 'Instigation', 'Affectedness']
data

Unnamed: 0,Volition,Instigation,Affectedness
Agent,1,1,0
Force,0,1,0
Affected Agent,1,1,1
Instrument,0,1,1
Volitional Undergoer,1,0,1
Frustrative,1,0,0
Neutral,0,0,0
Patient,0,0,1


We now sort the dataframe according to three criteria [+Instigation], [-Affectedness], and [+Volition] in this respective order:

In [25]:
data = data.sort_values(by=['Instigation','Affectedness','Volition'], ascending=[False, True, False])

Each role is given a rank according to the degree of agency:

In [26]:
rank = [n for n in range(-2, 6)]
rank.reverse()

data.insert(3, 'Rank', rank)

In [27]:
data

Unnamed: 0,Volition,Instigation,Affectedness,Rank
Agent,1,1,0,5
Force,0,1,0,4
Affected Agent,1,1,1,3
Instrument,0,1,1,2
Frustrative,1,0,0,1
Neutral,0,0,0,0
Volitional Undergoer,1,0,1,-1
Patient,0,0,1,-2


Now, we can combine add an agency rank to all participant references:

In [28]:
old_rank = []
new_rank = []

#Original role
for role in list(upd_df.role):
    rank = data[data.index == role].Rank.item()
    old_rank.append(rank)
upd_df.insert(6, 'rank', old_rank)

#New role
for role in list(upd_df.new_role):
    rank = data[data.index == role].Rank.item()
    new_rank.append(rank)
upd_df.insert(6, 'new_rank', new_rank)

In [29]:
upd_df.head()

Unnamed: 0,Vol,Inst,Aff,neg,role,new_role,new_rank,rank
688348,y,y,n,,Agent,Agent,5,5
688349,y,n,y,,Volitional Undergoer,Volitional Undergoer,-1,-1
688350,y,y,n,,Agent,Agent,5,5
688351,y,y,n,,Agent,Agent,5,5
688352,y,n,y,,Volitional Undergoer,Volitional Undergoer,-1,-1


In [30]:
upd_df.to_csv(f'{PATH}role_ranks.csv')