# Towards a Conflict Heuristic (DH 2023)

## 04. Conflict Annotation Dictionary

Last updated: 26.01.2023

julian.haeussler[at]tu-darmstadt.de

In [1]:
# imports

import numpy as np
import pandas as pd
import pickle

In [2]:
# paths

path_annotations = r"C:\Users\Julian\HESSENBOX-DA\Projekte\Konflikte\Daten\Annotation\data_prepro"

path_data = r'C:\Users\Public\Data\conflict_heuristics\pickled'

path_results = r'C:\Users\Public\Data\conflict_heuristics\csv'

path_results_pkl = r'C:\Users\Public\Data\conflict_heuristics\pickled'

#### create conflict dictionary

In [3]:
# read in dataframes of annotatated texts

df_eckbert = pd.read_csv(path_annotations + '\\eckbert_all_cleaned.csv')

In [4]:
df_eckbert.head(18)

Unnamed: 0,objekt,alina,janis,kristina,mari,mareike,inna
0,ludwig,o,o,o,o,o,o
1,tieck,o,o,o,o,o,o
2,die,o,o,o,o,o,o
3,märchen,o,o,o,o,o,o
4,aus,o,o,o,o,o,o
5,dem,o,o,o,o,o,o
6,phantasus,o,o,o,o,o,o
7,der,o,o,o,o,o,o
8,blonde,o,o,o,o,o,o
9,eckbert,o,o,o,o,o,o


In [5]:
df_judenbuche = pd.read_csv(path_annotations + '\\judenbuche_all_cleaned.csv')

df_krambambuli = pd.read_csv(path_annotations + '\\karambuli_all_cleaned.csv')

df_verwandlung = pd.read_csv(path_annotations + '\\verwandlung_all_cleaned.csv')

In [6]:
# create token list over all novels

lst_tokens = df_eckbert['objekt'].to_list()

lst_tokens.extend(df_judenbuche['objekt'].to_list())

lst_tokens.extend(df_krambambuli['objekt'].to_list())

lst_tokens.extend(df_verwandlung['objekt_x'].to_list())

In [7]:
lst_tokens[:10]

['ludwig',
 'tieck',
 'die',
 'märchen',
 'aus',
 'dem',
 'phantasus',
 'der',
 'blonde',
 'eckbert']

In [8]:
len(lst_tokens)

54543

In [9]:
# create type list over all novels

lst_types = list(set(lst_tokens))

In [10]:
lst_types[:10]

[nan,
 'Geräuschen',
 'Vormittags',
 'schmetterte',
 'verschlungen',
 'blitz',
 'stückchen',
 'richtige',
 'Pelzboa',
 'Angst']

In [11]:
lst_types = lst_types[1:] #remove nan

In [12]:
len(lst_types)

8507

In [13]:
# lowercase type list

lst_types = [word.lower() for word in lst_types]

In [14]:
lst_types = list(set(lst_types))

In [15]:
len(lst_types)

7861

In [16]:
lst_types[:10]

['schmetterte',
 'verschlungen',
 'blitz',
 'stückchen',
 'richtige',
 'ausrede',
 'förster',
 'fuße',
 'mann',
 'leuchtenden']

In [17]:
# select tags for dictionary

tags = ["emotionsbasierter_konfliktindikator", "zustandsbasierter_konfliktindikator",
        "aktionsbasierter_konfliktindikator", "konflikthinweis", "indikator_konfliktlösung",
        "indikator_konfliktintensität"]

In [18]:
# create dictionary

conflict_dict = {word:{tag:[] for tag in tags} for word in lst_types}

In [19]:
conflict_dict['ausgerenktem']

{'emotionsbasierter_konfliktindikator': [],
 'zustandsbasierter_konfliktindikator': [],
 'aktionsbasierter_konfliktindikator': [],
 'konflikthinweis': [],
 'indikator_konfliktlösung': [],
 'indikator_konfliktintensität': []}

In [20]:
# iterate over all four novels ad add rel freq of each tag as list to dictionary

for index,row in df_eckbert.iterrows():
    l = df_eckbert.iloc[index].tolist() #token+annotations as list
    w = l[0].lower() #first item in list is token
    for tag in tags:
        v1 = l.count(tag) # count how many times a tag is in the list
        v2 = v1/6 # divided by no. of annotators
        conflict_dict[w][tag].append(v2) #add average frequency of tag to conflict dictionary

In [21]:
conflict_dict['ritter']

{'emotionsbasierter_konfliktindikator': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 'zustandsbasierter_konfliktindikator': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 'aktionsbasierter_konfliktindikator': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 'konflikthinweis': [0.16666666666666666,
  0.16666666666666666,
  0.16666666666666666,
  0.16666666666666666,
  0.0,
  0.0,
  0.16666666666666666,
  0.16666666666666666],
 'indikator_konfliktlösung': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'indikator_konfliktintensität': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}

In [22]:
for index,row in df_judenbuche.iterrows():
    l = df_judenbuche.iloc[index].tolist()
    w = l[0].lower()
    for tag in tags:
        v1 = l.count(tag)
        v2 = v1/6
        conflict_dict[w][tag].append(v2)

In [23]:
for index,row in df_krambambuli.iterrows():
    l = df_krambambuli.iloc[index].tolist()
    w = l[0].lower()
    for tag in tags:
        v1 = l.count(tag)
        v2 = v1/6
        conflict_dict[w][tag].append(v2)

In [24]:
for index,row in df_verwandlung.iterrows():
    l = df_verwandlung.iloc[index].tolist()
    if isinstance(l[0],float) == False:
        w = l[0].lower()
        for tag in tags:
            v1 = l.count(tag)
            v2 = v1/5 #less anotators for 'Verwandlung'
            conflict_dict[w][tag].append(v2)

In [25]:
conflict_dict['ausgerenktem']

{'emotionsbasierter_konfliktindikator': [0.0],
 'zustandsbasierter_konfliktindikator': [1.0],
 'aktionsbasierter_konfliktindikator': [0.0],
 'konflikthinweis': [0.0],
 'indikator_konfliktlösung': [0.0],
 'indikator_konfliktintensität': [0.0]}

In [26]:
# compute average value for each tag for each word

conflict_dict_final = {word:{tag:0 for tag in tags} for word in lst_types}

In [27]:
for word in lst_types:
    for tag in tags:
        l1 = conflict_dict[word][tag]
        if len(l1) > 0:
            v = sum(l1)/len(l1)
            conflict_dict_final[word][tag]=v            

In [28]:
conflict_dict_final["ritter"]

{'emotionsbasierter_konfliktindikator': 0.0,
 'zustandsbasierter_konfliktindikator': 0.0,
 'aktionsbasierter_konfliktindikator': 0.0,
 'konflikthinweis': 0.12499999999999999,
 'indikator_konfliktlösung': 0.0,
 'indikator_konfliktintensität': 0.0}

In [29]:
# turn into df

df_conflict_final = pd.DataFrame.from_dict(conflict_dict_final,orient='index')

In [30]:
df_conflict_final.head()

Unnamed: 0,emotionsbasierter_konfliktindikator,zustandsbasierter_konfliktindikator,aktionsbasierter_konfliktindikator,konflikthinweis,indikator_konfliktlösung,indikator_konfliktintensität
schmetterte,0.0,0.0,0.0,0.0,0.0,0.0
verschlungen,0.0,0.0,0.0,0.0,0.0,0.0
blitz,0.0,0.0,0.0,0.5,0.0,0.0
stückchen,0.0,0.0,0.0,0.0,0.0,0.0
richtige,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
lst_types.index("ritter")

5789

In [32]:
df_conflict_final.iloc[[3120]]

Unnamed: 0,emotionsbasierter_konfliktindikator,zustandsbasierter_konfliktindikator,aktionsbasierter_konfliktindikator,konflikthinweis,indikator_konfliktlösung,indikator_konfliktintensität
aus,0.002132,0.001938,0.016085,0.001938,0.015891,0.002907


In [33]:
# save as csv

df_conflict_final.to_csv(path_results+'\\230126_df_conflict_final.csv', encoding='utf-8-sig') 

#### get label words

In [34]:
# copy df

df_conflict_labels = df_conflict_final.copy()

In [35]:
df_conflict_labels.head()

Unnamed: 0,emotionsbasierter_konfliktindikator,zustandsbasierter_konfliktindikator,aktionsbasierter_konfliktindikator,konflikthinweis,indikator_konfliktlösung,indikator_konfliktintensität
schmetterte,0.0,0.0,0.0,0.0,0.0,0.0
verschlungen,0.0,0.0,0.0,0.0,0.0,0.0
blitz,0.0,0.0,0.0,0.5,0.0,0.0
stückchen,0.0,0.0,0.0,0.0,0.0,0.0
richtige,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
# add column average conflict

df_conflict_labels['avrg_conflict'] = np.nan

In [37]:
df_conflict_labels.head()

Unnamed: 0,emotionsbasierter_konfliktindikator,zustandsbasierter_konfliktindikator,aktionsbasierter_konfliktindikator,konflikthinweis,indikator_konfliktlösung,indikator_konfliktintensität,avrg_conflict
schmetterte,0.0,0.0,0.0,0.0,0.0,0.0,
verschlungen,0.0,0.0,0.0,0.0,0.0,0.0,
blitz,0.0,0.0,0.0,0.5,0.0,0.0,
stückchen,0.0,0.0,0.0,0.0,0.0,0.0,
richtige,0.0,0.0,0.0,0.0,0.0,0.0,


In [38]:
# labels conflict

tags_conflict = ["emotionsbasierter_konfliktindikator", "zustandsbasierter_konfliktindikator",
                "aktionsbasierter_konfliktindikator", "konflikthinweis", "indikator_konfliktintensität"]

In [39]:
# fill column average conflict

for index,row in df_conflict_labels.iterrows():
    v=0
    for tag in tags_conflict:
        v+=df_conflict_labels.at[index,tag] #get value for specific label and add to sum of values
    df_conflict_labels.at[index,'avrg_conflict'] = v/5 # divide value by no. of labels and add to df

In [40]:
df_conflict_labels.at['ausgerenktem','avrg_conflict']

0.2

In [41]:
# get sorted list of columns

df_high_conflict = df_conflict_labels.sort_values(by=['avrg_conflict'],ascending=False).copy()

In [42]:
df_high_conflict.head()

Unnamed: 0,emotionsbasierter_konfliktindikator,zustandsbasierter_konfliktindikator,aktionsbasierter_konfliktindikator,konflikthinweis,indikator_konfliktlösung,indikator_konfliktintensität,avrg_conflict
starrsinn,0.0,0.55,0.0,0.45,0.0,0.0,0.2
butterdieb,0.0,0.5,0.0,0.5,0.0,0.0,0.2
zerbleute,0.0,0.0,1.0,0.0,0.0,0.0,0.2
totgeprügelt,0.0,1.0,0.0,0.0,0.0,0.0,0.2
leichnam,0.0,0.166667,0.0,0.833333,0.0,0.0,0.2


In [43]:
labels_high_conflict = []

for index,row in df_high_conflict.iterrows():
    if df_high_conflict.at[index,'avrg_conflict'] == 0.2:
        labels_high_conflict.append(index)

In [44]:
labels_high_conflict

['starrsinn',
 'butterdieb',
 'zerbleute',
 'totgeprügelt',
 'leichnam',
 'ängstlichem',
 'geschimpft',
 'habsucht',
 'ermordung',
 'weinkrampf',
 'vorgeworfen',
 'zerschlagenen',
 'entzweiten',
 'blutdurst',
 'bombardieren',
 'niederknallen',
 'züchtigte',
 'wimmernd',
 'blutrünstig',
 'bedenklicher',
 'mißbrauchen',
 'lumpenpack',
 'gehorchte',
 'blutfleck',
 'schimpfte',
 'schelten',
 'anschrie',
 'tränenstrom',
 'frechheit',
 'aufruhr',
 'grausamer',
 'ausgerenktem',
 'prügeln',
 'donnerwetter',
 'lumpenhund',
 'niederzubrennen',
 'grausamste',
 'verdammnis',
 'entriß',
 'durchgeweint',
 'zerschlagenem',
 'bangigkeit',
 'skandal',
 'ruinieren',
 'ballte',
 'fletschte',
 'katastrophen',
 'grausam',
 'grimmigster',
 'vermaledeiter',
 'peinigte',
 'verfluchte',
 'stach',
 'messerchen',
 'verleumdung',
 'zurechtweisungen',
 'verfluchter',
 'belogen',
 'manipulation',
 'schimpf',
 'schauder',
 'durchgedroschen',
 'drohungen',
 'beschimpfen',
 'ermordeten',
 'zerschlagen',
 'schlägereien

In [45]:
len(labels_high_conflict)

72

In [46]:
df_low_conflict = df_conflict_labels.sort_values(by=['indikator_konfliktlösung'],ascending=False).copy()

In [47]:
df_low_conflict.head(20)

Unnamed: 0,emotionsbasierter_konfliktindikator,zustandsbasierter_konfliktindikator,aktionsbasierter_konfliktindikator,konflikthinweis,indikator_konfliktlösung,indikator_konfliktintensität,avrg_conflict
tröstete,0.0,0.0,0.0,0.0,0.833333,0.0,0.0
besänftigen,0.0,0.0,0.0,0.0,0.8,0.0,0.0
söhnte,0.0,0.0,0.0,0.0,0.8,0.0,0.0
übereinkommen,0.0,0.0,0.0,0.0,0.8,0.0,0.0
tröstende,0.0,0.0,0.0,0.0,0.666667,0.0,0.0
reich,0.0,0.0,0.0,0.0,0.666667,0.0,0.0
versöhnt,0.0,0.0,0.0,0.0,0.666667,0.0,0.0
beruhigte,0.0,0.0,0.0,0.0,0.65,0.0,0.0
überstehen,0.0,0.0,0.0,0.0,0.6,0.0,0.0
erträglich,0.0,0.0,0.0,0.0,0.6,0.0,0.0


In [48]:
labels_low_conflict = []

for index,row in df_low_conflict.iterrows():
    if df_low_conflict.at[index,'indikator_konfliktlösung'] > 0.4: #>0.3 -> in total 100 words
        labels_low_conflict.append(index)

In [49]:
labels_low_conflict

['tröstete',
 'besänftigen',
 'söhnte',
 'übereinkommen',
 'tröstende',
 'reich',
 'versöhnt',
 'beruhigte',
 'überstehen',
 'erträglich',
 'beruhigen',
 'erlösung',
 'schonung',
 'gewonnen',
 'schonen',
 'beruhigt',
 'bittend',
 'alibi',
 'abhängig',
 'gerückt',
 'trösten',
 'freisprechen',
 'ausweg',
 'entgegenkommen',
 'leichterm',
 'besserung']

In [50]:
len(labels_low_conflict)

26

In [51]:
# save

with open(path_results_pkl + '\\230126_lst_labels_high_conflict.pkl', 'wb') as f:
    pickle.dump(labels_high_conflict, f)

In [52]:
with open(path_results_pkl + '\\230126_lst_labels_low_conflict.pkl', 'wb') as f:
    pickle.dump(labels_low_conflict, f)