# Towards a Conflict Heuristic (DH 2023)

## 05. Sentiment Analysis

Last updated: 20.04.2023

julian.haeussler[at]tu-darmstadt.de

In [1]:
# imports

import os
from gensim.models import KeyedVectors
import glob
import math
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import re
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# read in corpora

with open('../Analyseergebnisse/pickled/230116_lst_lists_phrases_Romantik_core_LEMMATIZED.pkl', 'rb') as f:
    lst_lists_phrases_Romantik_core_LEMMATIZED = pickle.load(f)

In [3]:
with open('../Analyseergebnisse/pickled/230116_lst_lists_phrases_Realismus_LEMMATIZED.pkl', 'rb') as f:
    lst_lists_phrases_Realismus_LEMMATIZED = pickle.load(f)

In [4]:
with open('../Analyseergebnisse/pickled/230116_lst_lists_phrases_Naturalismus_LEMMATIZED.pkl', 'rb') as f:
    lst_lists_phrases_Naturalismus_LEMMATIZED = pickle.load(f)

In [5]:
with open('../Analyseergebnisse/pickled/230116_lst_lists_phrases_Romantik_core_TOKENIZED.pkl', 'rb') as f:
    lst_lists_phrases_Romantik_core_TOKENIZED = pickle.load(f)

In [6]:
with open('../Analyseergebnisse/pickled/230116_lst_lists_phrases_Realismus_TOKENIZED.pkl', 'rb') as f:
    lst_lists_phrases_Realismus_TOKENIZED = pickle.load(f)

In [7]:
with open('../Analyseergebnisse/pickled/230116_lst_lists_phrases_Naturalismus_TOKENIZED.pkl', 'rb') as f:
    lst_lists_phrases_Naturalismus_TOKENIZED = pickle.load(f)

In [8]:
# create phrase lists and word lists

lst_phrases_Romantik_core_LEMMATIZED = [phrase for novel in lst_lists_phrases_Romantik_core_LEMMATIZED for phrase in novel]

lst_words_Romantik_core_LEMMATIZED = [word for phrase in lst_phrases_Romantik_core_LEMMATIZED for word in phrase]

In [9]:
lst_phrases_Realismus_LEMMATIZED = [phrase for novel in lst_lists_phrases_Realismus_LEMMATIZED for phrase in novel]

lst_words_Realismus_LEMMATIZED = [word for phrase in lst_phrases_Realismus_LEMMATIZED for word in phrase]

In [10]:
lst_phrases_Naturalismus_LEMMATIZED = [phrase for novel in lst_lists_phrases_Naturalismus_LEMMATIZED for phrase in novel]

lst_words_Naturalismus_LEMMATIZED = [word for phrase in lst_phrases_Naturalismus_LEMMATIZED for word in phrase]

In [11]:
lst_phrases_Romantik_core_TOKENIZED = [phrase for novel in lst_lists_phrases_Romantik_core_TOKENIZED for phrase in novel]

In [12]:
lst_phrases_Realismus_TOKENIZED = [phrase for novel in lst_lists_phrases_Realismus_TOKENIZED for phrase in novel]

In [13]:
lst_phrases_Naturalismus_TOKENIZED = [phrase for novel in lst_lists_phrases_Naturalismus_TOKENIZED for phrase in novel]

In [14]:
# read in models

model_Romantik = KeyedVectors.load('../Analyseergebnisse/models/230116_model_Romantik.kv')

model_RealismusNaturalismus = KeyedVectors.load('../Analyseergebnisse/models/230116_model_RealismusNaturalismus.kv')

#### Approach 1 (SA, adjectives)

In [15]:
# define labels

lst_high_arousal = ['zornig','nervös','aufgeregt','entzückt']

lst_high_valence = ['entzückt','glücklich','zufrieden','gelassen']

lst_low_arousal =  ['gelassen','ruhig','müde','überdrüssig']

lst_low_valence = ['überdrüssig','niedergeschlagen','bekümmert','zornig']

In [16]:
# define functions (see e.g. Jacobs et al. 2020)

def emo_value(word, model, aspect = "valence"):
    if aspect == "valence":
        values_high = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_high_valence]
        high = sum(values_high)/4
        values_low = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_low_valence]
        low = sum(values_low)/4
    if aspect == "arousal":
        values_high = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_high_arousal]
        high = sum(values_high)/4
        values_low = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_low_arousal]
        low = sum(values_low)/4
    
    return (high - low)

In [17]:
def emotional_potential(valence, arousal):
    return abs(valence) * arousal

In [18]:
# ROMANTIK

# create df

df_VPs_Romantik_core = pd.DataFrame(columns=['phrase_tokenized', 'phrase_lemmatized','mean_val_adj', 'mean_aro_adj', 'mean_ep_adj',
                                            'mean_val_noun', 'mean_aro_noun', 'mean_ep_noun', 'mean_conf_dornseiff', 'mean_conf_annotation',
                                            'novel_title', 'novel_beg', 'novel_end', 'phrase_pos'])

In [19]:
df_VPs_Romantik_core['phrase_tokenized'] = lst_phrases_Romantik_core_TOKENIZED

In [20]:
df_VPs_Romantik_core['phrase_lemmatized'] = lst_phrases_Romantik_core_LEMMATIZED

In [21]:
df_VPs_Romantik_core.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[dem, schutzgeist, bleibt, ein, treuer, sinn, ...","[der, schutzgeist, bleiben, einen, treu, sinn,...",,,,,,,,,,,,
1,"[so, ward, auch, mir, ein, hochgesellig, leben...","[so, ward, auch, sich, einen, hochgesellig, le...",,,,,,,,,,,,
2,"[wo, sich, die, worte, leicht, zum, lied, gere...","[wo, sich, der, wort, leicht, zum, lied, reihen]",,,,,,,,,,,,
3,"[mein, lied, und, ich, wir, bleiben, treu, erg...","[mein, lied, und, ich, ich, bleiben, treu, erg...",,,,,,,,,,,,
4,"[der, uns, hat, durch, melodie, geweiht]","[der, sich, haben, durch, melodie, weihen]",,,,,,,,,,,,


In [22]:
df_VPs_Romantik_core.tail()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
250852,"[ , ruhig, modere, sein, gebein]","[ , ruhig, modere, mein, gebein]",,,,,,,,,,,,
250853,"[friede, sei, mit, seiner, seele]","[friede, sein, mit, sich, seele]",,,,,,,,,,,,
250854,"[die, erzählung, seiner, nachherigen, schicksa...","[der, erzählung, sich, nachherigen, schicksal,...",,,,,,,,,,,,
250855,"[dahin, muß, ich, hier, dieselben, verweisen, 2]","[dahin, muß, ich, hier, derselbe, verweisen, 2]",,,,,,,,,,,,
250856,"[eine, ausführlichere, erzählung, der, schicks...","[eine, ausführlich, erzählung, der, schicksal,...",,,,,,,,,,,,


In [23]:
# read in novel titles

lst_novels_titles = []

lst_files_names = glob.glob(os.path.join(os.getcwd(), r"C:\Users\Julian\HESSENBOX-DA\Projekte\Konflikte\Daten\Romantik (abgeschlossen)\TXT\Kernkorpus", "*.txt"))

for entry in lst_files_names:
    lst_novels_titles.append(re.search(r"(?<=Kernkorpus\\)(.*)(?=.txt)",entry).group(1))

In [24]:
lst_novels_titles[:3]

['Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Busse_der_Graefin_Dolores',
 'Arnim_Achim_von_Die_Kronenwaechter_Erster_Band',
 'Arnim_Achim_von_Die_Kronenwaechter_Zweiter_Band']

In [25]:
# get novels lenghts in VPS

lst_files_all = glob.glob(os.path.join(os.getcwd(), '..\\Analyseergebnisse\\pickled\\all', "*.pkl"))

lst_novels_lens = []

beginning = 0

for i in range(0,len(lst_novels_titles)):
    # list of phrases
    lst_phrases_novel = []
    
    # iterate through folders "all"
    for j in range(0,len(lst_files_all)):
        if j%4 == 0:
            name = re.search(r"(?<=all\\)(.*)(?=_phrases_lemmatized)",lst_files_all[j]).group(1)
            if name == lst_novels_titles[i]:
                with open(lst_files_all[j], 'rb') as f:
                    lst_phrases_novel = pickle.load(f)
                    
    # get no. of phrases
    len_novel = len(lst_phrases_novel)
    
    # set end as length of novel in phrases and add to list of lengths
    end = beginning + len_novel
    lst_novels_lens.append((beginning,end))
    
    # update beginning
    beginning = lst_novels_lens[i][1] 

In [26]:
len(lst_novels_lens)

26

In [27]:
lst_novels_lens[:5]

[(0, 17603), (17603, 28562), (28562, 38464), (38464, 41231), (41231, 56319)]

In [28]:
# add new info to df

for i in range(0,len(lst_novels_lens)):
    beginning = lst_novels_lens[i][0]
    end = lst_novels_lens[i][1]
    for j in range(beginning,end):
        df_VPs_Romantik_core.at[j,'novel_title'] = lst_novels_titles[i]
        df_VPs_Romantik_core.at[j,'novel_beg'] = beginning #counting starts with 0 (!)
        df_VPs_Romantik_core.at[j,'novel_end'] = end-1 #novel length in VPs is end+1
        df_VPs_Romantik_core.at[j,'phrase_pos'] = int(j-beginning) #starts with 0 (!)

In [29]:
df_VPs_Romantik_core.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[dem, schutzgeist, bleibt, ein, treuer, sinn, ...","[der, schutzgeist, bleiben, einen, treu, sinn,...",,,,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,0
1,"[so, ward, auch, mir, ein, hochgesellig, leben...","[so, ward, auch, sich, einen, hochgesellig, le...",,,,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,1
2,"[wo, sich, die, worte, leicht, zum, lied, gere...","[wo, sich, der, wort, leicht, zum, lied, reihen]",,,,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,2
3,"[mein, lied, und, ich, wir, bleiben, treu, erg...","[mein, lied, und, ich, ich, bleiben, treu, erg...",,,,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,3
4,"[der, uns, hat, durch, melodie, geweiht]","[der, sich, haben, durch, melodie, weihen]",,,,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,4


In [30]:
df_VPs_Romantik_core.tail()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
250852,"[ , ruhig, modere, sein, gebein]","[ , ruhig, modere, mein, gebein]",,,,,,,,,"Vulpius_Christian_August_Rinaldo_Rinaldini,_de...",231861,250856,18991
250853,"[friede, sei, mit, seiner, seele]","[friede, sein, mit, sich, seele]",,,,,,,,,"Vulpius_Christian_August_Rinaldo_Rinaldini,_de...",231861,250856,18992
250854,"[die, erzählung, seiner, nachherigen, schicksa...","[der, erzählung, sich, nachherigen, schicksal,...",,,,,,,,,"Vulpius_Christian_August_Rinaldo_Rinaldini,_de...",231861,250856,18993
250855,"[dahin, muß, ich, hier, dieselben, verweisen, 2]","[dahin, muß, ich, hier, derselbe, verweisen, 2]",,,,,,,,,"Vulpius_Christian_August_Rinaldo_Rinaldini,_de...",231861,250856,18994
250856,"[eine, ausführlichere, erzählung, der, schicks...","[eine, ausführlich, erzählung, der, schicksal,...",,,,,,,,,"Vulpius_Christian_August_Rinaldo_Rinaldini,_de...",231861,250856,18995


In [31]:
#determine values for all types

dict_profiles = {}
lst_valence = []
lst_arousal = []
lst_potential = []

for word in list(set(lst_words_Romantik_core_LEMMATIZED)):
    try:
        valence = emo_value(word, model_Romantik, "valence")
        lst_valence.append(valence)
        arousal = emo_value(word, model_Romantik, "arousal")
        lst_arousal.append(arousal)
        potential = emotional_potential(valence, arousal)
        lst_potential.append(potential)
        dict_profiles[word] = (valence, arousal, potential)
    except:
        continue

In [32]:
# SA for all VPs

i = 0

for phrase in lst_phrases_Romantik_core_LEMMATIZED:
    
    lst_valence_phrase = []
    lst_arousal_phrase = []
    lst_potential_phrase = []
    
    for word in phrase:
        lst_valence_phrase.append(dict_profiles[word][0])
        lst_arousal_phrase.append(dict_profiles[word][1])
        lst_potential_phrase.append(dict_profiles[word][2])
    
    mean_val_phrase = sum(lst_valence_phrase) / len(lst_valence_phrase)
    mean_aro_phrase = sum(lst_arousal_phrase) / len(lst_arousal_phrase)
    mean_ep_phrase = sum(lst_potential_phrase) / len(lst_potential_phrase)
    
    df_VPs_Romantik_core.at[i,'mean_val_adj'] = mean_val_phrase
    df_VPs_Romantik_core.at[i,'mean_aro_adj'] = mean_aro_phrase
    df_VPs_Romantik_core.at[i,'mean_ep_adj'] = mean_ep_phrase
    
    i += 1

In [33]:
df_VPs_Romantik_core.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[dem, schutzgeist, bleibt, ein, treuer, sinn, ...","[der, schutzgeist, bleiben, einen, treu, sinn,...",-0.008388,0.007577,-0.000145,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,0
1,"[so, ward, auch, mir, ein, hochgesellig, leben...","[so, ward, auch, sich, einen, hochgesellig, le...",0.000133,-0.000132,-0.000119,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,1
2,"[wo, sich, die, worte, leicht, zum, lied, gere...","[wo, sich, der, wort, leicht, zum, lied, reihen]",-0.000316,-0.002302,-4.8e-05,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,2
3,"[mein, lied, und, ich, wir, bleiben, treu, erg...","[mein, lied, und, ich, ich, bleiben, treu, erg...",-0.002559,-0.011589,-0.000559,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,3
4,"[der, uns, hat, durch, melodie, geweiht]","[der, sich, haben, durch, melodie, weihen]",0.021854,0.004098,-0.000289,,,,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,4


In [34]:
# REALISMUS

# create df

df_VPs_Realismus = pd.DataFrame(columns=['phrase_tokenized', 'phrase_lemmatized','mean_val_adj', 'mean_aro_adj', 'mean_ep_adj',
                                            'mean_val_noun', 'mean_aro_noun', 'mean_ep_noun', 'mean_conf_dornseiff', 'mean_conf_annotation',
                                            'novel_title', 'novel_beg', 'novel_end', 'phrase_pos'])

In [35]:
df_VPs_Realismus['phrase_tokenized'] = lst_phrases_Realismus_TOKENIZED

df_VPs_Realismus['phrase_lemmatized'] = lst_phrases_Realismus_LEMMATIZED

In [36]:
# read in novel titles

lst_novels_titles = []

lst_files_names = glob.glob(os.path.join(os.getcwd(), r"C:\Users\Public\Data\Korpuserstellung\Realismus", "*.txt"))

for entry in lst_files_names:
    lst_novels_titles.append(re.search(r"(?<=Realismus\\)(.*)(?=.txt)",entry).group(1))

In [37]:
# get novels lenghts in VPS

lst_files_all = glob.glob(os.path.join(os.getcwd(), '..\\Analyseergebnisse\\pickled\\all', "*.pkl"))

lst_novels_lens = []

beginning = 0

for i in range(0,len(lst_novels_titles)):
    # get list of phrases
    lst_phrases_novel = []
    
    # iterate through folders "all"
    for j in range(0,len(lst_files_all)):
        if j%4 == 0:
            name = re.search(r"(?<=all\\)(.*)(?=_phrases_lemmatized)",lst_files_all[j]).group(1)
            if name == lst_novels_titles[i]:
                with open(lst_files_all[j], 'rb') as f:
                    lst_phrases_novel = pickle.load(f)
                    
    # get no. of phrases
    len_novel = len(lst_phrases_novel)
    
    # set end as length of novel in phrases and add to list of lengths
    end = beginning + len_novel
    lst_novels_lens.append((beginning,end))
    
    # update beginning
    beginning = lst_novels_lens[i][1]

In [38]:
# add new info to df

for i in range(0,len(lst_novels_lens)):
    beginning = lst_novels_lens[i][0]
    end = lst_novels_lens[i][1]
    for j in range(beginning,end):
        df_VPs_Realismus.at[j,'novel_title'] = lst_novels_titles[i]
        df_VPs_Realismus.at[j,'novel_beg'] = beginning #counting starts with 0 (!)
        df_VPs_Realismus.at[j,'novel_end'] = end-1 #novel length in VPs is end+1
        df_VPs_Realismus.at[j,'phrase_pos'] = int(j-beginning) #starts with 0 (!)

In [39]:
#determine values for all types

dict_profiles = {}
lst_valence = []
lst_arousal = []
lst_potential = []

for word in list(set(lst_words_Realismus_LEMMATIZED)):
    try:
        valence = emo_value(word, model_RealismusNaturalismus, "valence")
        lst_valence.append(valence)
        arousal = emo_value(word, model_RealismusNaturalismus, "arousal")
        lst_arousal.append(arousal)
        potential = emotional_potential(valence, arousal)
        lst_potential.append(potential)
        dict_profiles[word] = (valence, arousal, potential)
    except:
        continue

In [40]:
# SA for all VP

i = 0

for phrase in lst_phrases_Realismus_LEMMATIZED:
    
    lst_valence_phrase = []
    lst_arousal_phrase = []
    lst_potential_phrase = []
    
    for word in phrase:
        lst_valence_phrase.append(dict_profiles[word][0])
        lst_arousal_phrase.append(dict_profiles[word][1])
        lst_potential_phrase.append(dict_profiles[word][2])
    
    mean_val_phrase = sum(lst_valence_phrase) / len(lst_valence_phrase)
    mean_aro_phrase = sum(lst_arousal_phrase) / len(lst_arousal_phrase)
    mean_ep_phrase = sum(lst_potential_phrase) / len(lst_potential_phrase)
    
    df_VPs_Realismus.at[i,'mean_val_adj'] = mean_val_phrase
    df_VPs_Realismus.at[i,'mean_aro_adj'] = mean_aro_phrase
    df_VPs_Realismus.at[i,'mean_ep_adj'] = mean_ep_phrase
    
    i += 1

In [41]:
df_VPs_Realismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[es, klopfte, hart, und, kurz, an, die, tür]","[ich, klopfen, hart, und, kurz, an, der, tür]",-0.051287,-0.029731,-0.001594,,,,,,Boy-Ed_Ida_Empor,0,6496,0
1,"[obgleich, irene, seit, vielen, stunden, bald,...","[obgleich, irene, seit, viel, stunde, bald, wa...",-0.023664,-0.023765,-0.000829,,,,,,Boy-Ed_Ida_Empor,0,6496,1
2,"[fuhr, sie, nun, doch, erschreckt, zusammen]","[fahren, ich, nun, doch, erschrecken, zusammen]",-0.043396,-0.008009,0.001012,,,,,,Boy-Ed_Ida_Empor,0,6496,2
3,"[die, ganze, nacht, hatte, sie, keinen, rechte...","[der, ganze, nacht, haben, ich, kein, recht, s...",-0.027959,-0.033338,-0.001239,,,,,,Boy-Ed_Ida_Empor,0,6496,3
4,"[und, nun, schien, es, ihr, als, wären, ihre, ...","[und, nun, scheinen, ich, mein, als, sein, mei...",-0.037474,-0.032536,-0.001683,,,,,,Boy-Ed_Ida_Empor,0,6496,4


In [42]:
# NATURALISMUS

# create df

df_VPs_Naturalismus = pd.DataFrame(columns=['phrase_tokenized', 'phrase_lemmatized','mean_val_adj', 'mean_aro_adj', 'mean_ep_adj',
                                            'mean_val_noun', 'mean_aro_noun', 'mean_ep_noun', 'mean_conf_dornseiff', 'mean_conf_annotation',
                                            'novel_title', 'novel_beg', 'novel_end', 'phrase_pos'])

In [43]:
df_VPs_Naturalismus['phrase_tokenized'] = lst_phrases_Naturalismus_TOKENIZED

df_VPs_Naturalismus['phrase_lemmatized'] = lst_phrases_Naturalismus_LEMMATIZED

In [44]:
# read in novel titles

lst_novels_titles = []

lst_files_names = glob.glob(os.path.join(os.getcwd(), r"C:\Users\Public\Data\Korpuserstellung\Naturalismus", "*.txt"))

for entry in lst_files_names:
    lst_novels_titles.append(re.search(r"(?<=Naturalismus\\)(.*)(?=.txt)",entry).group(1))

In [45]:
# get novels lenghts in VPS

lst_files_all = glob.glob(os.path.join(os.getcwd(), '..\\Analyseergebnisse\\pickled\\all', "*.pkl"))

lst_novels_lens = []

beginning = 0

for i in range(0,len(lst_novels_titles)):
    # get list of phrases
    lst_phrases_novel = []
    
    # iterate through folders "all"
    for j in range(0,len(lst_files_all)):
        if j%4 == 0:
            name = re.search(r"(?<=all\\)(.*)(?=_phrases_lemmatized)",lst_files_all[j]).group(1)
            if name == lst_novels_titles[i]:
                with open(lst_files_all[j], 'rb') as f:
                    lst_phrases_novel = pickle.load(f)
                    
    # get no. of phrases
    len_novel = len(lst_phrases_novel)
    
    # set end as length of novel in phrases and add to list of lengths
    end = beginning + len_novel
    lst_novels_lens.append((beginning,end))
    
    # update beginning
    beginning = lst_novels_lens[i][1]

In [46]:
# add new info to df

for i in range(0,len(lst_novels_lens)):
    beginning = lst_novels_lens[i][0]
    end = lst_novels_lens[i][1]
    for j in range(beginning,end):
        df_VPs_Naturalismus.at[j,'novel_title'] = lst_novels_titles[i]
        df_VPs_Naturalismus.at[j,'novel_beg'] = beginning #counting starts with 0 (!)
        df_VPs_Naturalismus.at[j,'novel_end'] = end-1 #novel length in VPs is end+1
        df_VPs_Naturalismus.at[j,'phrase_pos'] = int(j-beginning) #starts with 0 (!)

In [47]:
#determine values for all types

dict_profiles = {}
lst_valence = []
lst_arousal = []
lst_potential = []

for word in list(set(lst_words_Naturalismus_LEMMATIZED)):
    try:
        valence = emo_value(word, model_RealismusNaturalismus, "valence")
        lst_valence.append(valence)
        arousal = emo_value(word, model_RealismusNaturalismus, "arousal")
        lst_arousal.append(arousal)
        potential = emotional_potential(valence, arousal)
        lst_potential.append(potential)
        dict_profiles[word] = (valence, arousal, potential)
    except:
        continue

In [48]:
# SA for all VP

i = 0

for phrase in lst_phrases_Naturalismus_LEMMATIZED:
    
    lst_valence_phrase = []
    lst_arousal_phrase = []
    lst_potential_phrase = []
    
    for word in phrase:
        lst_valence_phrase.append(dict_profiles[word][0])
        lst_arousal_phrase.append(dict_profiles[word][1])
        lst_potential_phrase.append(dict_profiles[word][2])
    
    mean_val_phrase = sum(lst_valence_phrase) / len(lst_valence_phrase)
    mean_aro_phrase = sum(lst_arousal_phrase) / len(lst_arousal_phrase)
    mean_ep_phrase = sum(lst_potential_phrase) / len(lst_potential_phrase)
    
    df_VPs_Naturalismus.at[i,'mean_val_adj'] = mean_val_phrase
    df_VPs_Naturalismus.at[i,'mean_aro_adj'] = mean_aro_phrase
    df_VPs_Naturalismus.at[i,'mean_ep_adj'] = mean_ep_phrase
    
    i += 1

In [49]:
df_VPs_Naturalismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[in, der, morgenstille, war, nichts, vernehmba...","[in, der, morgenstille, sein, nichts, vernehmb...",-0.041573,-0.009479,-0.000255,,,,,,Andreas-Salome_Lou_Ruth,0,8249,0
1,"[die, sich, nicht, weit, von, der, russischen,...","[der, sich, nicht, weit, von, der, russisch, h...",-0.017482,-0.012251,-0.000511,,,,,,Andreas-Salome_Lou_Ruth,0,8249,1
2,"[die, breite, ungepflasterte, straße, die, sic...","[der, breit, ungepflasterte, straße, der, sich...",-0.023813,-0.023564,-0.00089,,,,,,Andreas-Salome_Lou_Ruth,0,8249,2
3,"[dann, holperte, ein, leiterwagen, mit, einige...","[dann, holpern, einen, leiterwagen, mit, einig...",-0.05482,-0.040984,-0.00252,,,,,,Andreas-Salome_Lou_Ruth,0,8249,3
4,"[der, fuhrmann, kletterte, von, seinem, sitz, ...","[der, fuhrmann, klettern, von, mein, sitz, wer...",-0.04453,-0.022021,-0.001098,,,,,,Andreas-Salome_Lou_Ruth,0,8249,4


#### Approach 1 (SA, nouns)

In [50]:
# define labels

lst_high_valence = ['behagen', 'glück', 'freude', 'stolz', 'hilfe', 'befriedigung', 'erstaunen']

lst_low_valence = ['ekel', 'verlegenheit', 'sorge', 'traurigkeit', 'schande']

lst_arousal_total = ['vergnügen', 'zorn', 'verachtung', 'behagen', 'ekel', 'verlegenheit', 'erregung', 'sorge', 'glück', 'interesse', 'freude', 'hilfe', 'traurigkeit', 'befriedigung']

In [54]:
# update function

def emo_value(word, model, aspect = "valence"):
    if aspect == "valence":
        values_high = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_high_valence]
        high = sum(values_high)/len(lst_high_valence)
        values_low = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_low_valence]
        low = sum(values_low)/len(lst_low_valence)
        final_value = (high-low) 
    if aspect == "arousal":
        values_all = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_arousal_total]
        final_value = sum(values_all)/len(lst_arousal_total)
        
    return final_value

In [55]:
# ROMANTIK

#determine values for all types

dict_profiles = {}
lst_valence = []
lst_arousal = []
lst_potential = []

for word in list(set(lst_words_Romantik_core_LEMMATIZED)):
    try:
        valence = emo_value(word, model_Romantik, "valence")
        lst_valence.append(valence)
        arousal = emo_value(word, model_Romantik, "arousal")
        lst_arousal.append(arousal)
        potential = emotional_potential(valence, arousal)
        lst_potential.append(potential)
        dict_profiles[word] = (valence, arousal, potential)
    except:
        continue

In [59]:
# SA for all VPs

i = 0

for phrase in lst_phrases_Romantik_core_LEMMATIZED:
    
    lst_valence_phrase = []
    lst_arousal_phrase = []
    lst_potential_phrase = []
    
    for word in phrase:
        lst_valence_phrase.append(dict_profiles[word][0])
        lst_arousal_phrase.append(dict_profiles[word][1])
        lst_potential_phrase.append(dict_profiles[word][2])
    
    mean_val_phrase = sum(lst_valence_phrase) / len(lst_valence_phrase)
    mean_aro_phrase = sum(lst_arousal_phrase) / len(lst_arousal_phrase)
    mean_ep_phrase = sum(lst_potential_phrase) / len(lst_potential_phrase)
    
    df_VPs_Romantik_core.at[i,'mean_val_noun'] = mean_val_phrase
    df_VPs_Romantik_core.at[i,'mean_aro_noun'] = mean_aro_phrase
    df_VPs_Romantik_core.at[i,'mean_ep_noun'] = mean_ep_phrase
    
    i += 1

In [60]:
df_VPs_Romantik_core.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[dem, schutzgeist, bleibt, ein, treuer, sinn, ...","[der, schutzgeist, bleiben, einen, treu, sinn,...",-0.008388,0.007577,-0.000145,0.007036,0.224101,0.006323,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,0
1,"[so, ward, auch, mir, ein, hochgesellig, leben...","[so, ward, auch, sich, einen, hochgesellig, le...",0.000133,-0.000132,-0.000119,0.007223,0.257948,0.005676,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,1
2,"[wo, sich, die, worte, leicht, zum, lied, gere...","[wo, sich, der, wort, leicht, zum, lied, reihen]",-0.000316,-0.002302,-4.8e-05,0.009215,0.211023,0.005506,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,2
3,"[mein, lied, und, ich, wir, bleiben, treu, erg...","[mein, lied, und, ich, ich, bleiben, treu, erg...",-0.002559,-0.011589,-0.000559,0.013513,0.267624,0.006237,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,3
4,"[der, uns, hat, durch, melodie, geweiht]","[der, sich, haben, durch, melodie, weihen]",0.021854,0.004098,-0.000289,-0.004865,0.249832,0.006091,,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,4


In [61]:
# REALISMUS

#determine values for all types

dict_profiles = {}
lst_valence = []
lst_arousal = []
lst_potential = []

for word in list(set(lst_words_Realismus_LEMMATIZED)):
    try:
        valence = emo_value(word, model_RealismusNaturalismus, "valence")
        lst_valence.append(valence)
        arousal = emo_value(word, model_RealismusNaturalismus, "arousal")
        lst_arousal.append(arousal)
        potential = emotional_potential(valence, arousal)
        lst_potential.append(potential)
        dict_profiles[word] = (valence, arousal, potential)
    except:
        continue

In [62]:
# SA for all VPs

i = 0

for phrase in lst_phrases_Realismus_LEMMATIZED:
    
    lst_valence_phrase = []
    lst_arousal_phrase = []
    lst_potential_phrase = []
    
    for word in phrase:
        lst_valence_phrase.append(dict_profiles[word][0])
        lst_arousal_phrase.append(dict_profiles[word][1])
        lst_potential_phrase.append(dict_profiles[word][2])
    
    mean_val_phrase = sum(lst_valence_phrase) / len(lst_valence_phrase)
    mean_aro_phrase = sum(lst_arousal_phrase) / len(lst_arousal_phrase)
    mean_ep_phrase = sum(lst_potential_phrase) / len(lst_potential_phrase)
    
    df_VPs_Realismus.at[i,'mean_val_noun'] = mean_val_phrase
    df_VPs_Realismus.at[i,'mean_aro_noun'] = mean_aro_phrase
    df_VPs_Realismus.at[i,'mean_ep_noun'] = mean_ep_phrase
    
    i += 1

In [63]:
df_VPs_Realismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[es, klopfte, hart, und, kurz, an, die, tür]","[ich, klopfen, hart, und, kurz, an, der, tür]",-0.051287,-0.029731,-0.001594,-0.014026,0.239272,0.00693,,,Boy-Ed_Ida_Empor,0,6496,0
1,"[obgleich, irene, seit, vielen, stunden, bald,...","[obgleich, irene, seit, viel, stunde, bald, wa...",-0.023664,-0.023765,-0.000829,-0.028803,0.25432,0.008266,,,Boy-Ed_Ida_Empor,0,6496,1
2,"[fuhr, sie, nun, doch, erschreckt, zusammen]","[fahren, ich, nun, doch, erschrecken, zusammen]",-0.043396,-0.008009,0.001012,-0.033831,0.252944,0.009461,,,Boy-Ed_Ida_Empor,0,6496,2
3,"[die, ganze, nacht, hatte, sie, keinen, rechte...","[der, ganze, nacht, haben, ich, kein, recht, s...",-0.027959,-0.033338,-0.001239,-0.035174,0.273068,0.011571,,,Boy-Ed_Ida_Empor,0,6496,3
4,"[und, nun, schien, es, ihr, als, wären, ihre, ...","[und, nun, scheinen, ich, mein, als, sein, mei...",-0.037474,-0.032536,-0.001683,-0.044963,0.29948,0.015282,,,Boy-Ed_Ida_Empor,0,6496,4


In [64]:
# NATURALISMUS

#determine values for all types

dict_profiles = {}
lst_valence = []
lst_arousal = []
lst_potential = []

for word in list(set(lst_words_Naturalismus_LEMMATIZED)):
    try:
        valence = emo_value(word, model_RealismusNaturalismus, "valence")
        lst_valence.append(valence)
        arousal = emo_value(word, model_RealismusNaturalismus, "arousal")
        lst_arousal.append(arousal)
        potential = emotional_potential(valence, arousal)
        lst_potential.append(potential)
        dict_profiles[word] = (valence, arousal, potential)
    except:
        continue

In [65]:
# SA for all VPs

i = 0

for phrase in lst_phrases_Naturalismus_LEMMATIZED:
    
    lst_valence_phrase = []
    lst_arousal_phrase = []
    lst_potential_phrase = []
    
    for word in phrase:
        lst_valence_phrase.append(dict_profiles[word][0])
        lst_arousal_phrase.append(dict_profiles[word][1])
        lst_potential_phrase.append(dict_profiles[word][2])
    
    mean_val_phrase = sum(lst_valence_phrase) / len(lst_valence_phrase)
    mean_aro_phrase = sum(lst_arousal_phrase) / len(lst_arousal_phrase)
    mean_ep_phrase = sum(lst_potential_phrase) / len(lst_potential_phrase)
    
    df_VPs_Naturalismus.at[i,'mean_val_noun'] = mean_val_phrase
    df_VPs_Naturalismus.at[i,'mean_aro_noun'] = mean_aro_phrase
    df_VPs_Naturalismus.at[i,'mean_ep_noun'] = mean_ep_phrase
    
    i += 1

In [66]:
df_VPs_Naturalismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[in, der, morgenstille, war, nichts, vernehmba...","[in, der, morgenstille, sein, nichts, vernehmb...",-0.041573,-0.009479,-0.000255,-0.018016,0.236784,0.005706,,,Andreas-Salome_Lou_Ruth,0,8249,0
1,"[die, sich, nicht, weit, von, der, russischen,...","[der, sich, nicht, weit, von, der, russisch, h...",-0.017482,-0.012251,-0.000511,-0.002285,0.230448,0.0063,,,Andreas-Salome_Lou_Ruth,0,8249,1
2,"[die, breite, ungepflasterte, straße, die, sic...","[der, breit, ungepflasterte, straße, der, sich...",-0.023813,-0.023564,-0.00089,-0.005926,0.223625,0.005873,,,Andreas-Salome_Lou_Ruth,0,8249,2
3,"[dann, holperte, ein, leiterwagen, mit, einige...","[dann, holpern, einen, leiterwagen, mit, einig...",-0.05482,-0.040984,-0.00252,-0.011334,0.213727,0.004154,,,Andreas-Salome_Lou_Ruth,0,8249,3
4,"[der, fuhrmann, kletterte, von, seinem, sitz, ...","[der, fuhrmann, klettern, von, mein, sitz, wer...",-0.04453,-0.022021,-0.001098,-0.015741,0.23927,0.005927,,,Andreas-Salome_Lou_Ruth,0,8249,4


#### Approach 2 (Dornseiff)

In [67]:
# define labels

lst_high_conflict = ['töten',
 'unglück',
 'gefahr',
 'furcht',
 'schrecken',
 'streit',
 'kampf',
 'quälen',
 'rache',
 'gewalt',
 'waffe',
 'schutz']

lst_low_conflict = ['glück', 'leicht', 'lust', 'bewundern', 'schönheit', 'friede']


In [68]:
# update function

def conf_value(word, model):
    values_high = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_high_conflict]
    high = sum(values_high)/len(lst_high_conflict)
    values_low = [cosine_similarity([model.get_vector(label),model.get_vector(word)])[0,1] for label in lst_low_conflict]
    low = sum(values_low)/len(lst_low_conflict)
    
    return (high - low)

In [69]:
# ROMANTIK

#determine values for all types

dict_conflict = {}
lst_conflict = []

for word in list(set(lst_words_Romantik_core_LEMMATIZED)):
    try:
        conflict_value = conf_value(word, model_Romantik)
        lst_conflict.append(conflict_value)
        dict_conflict[word] = conflict_value
    except:
        continue

In [70]:
i = 0

for phrase in lst_phrases_Romantik_core_LEMMATIZED:
    
    lst_conflict_phrase = []
    
    for word in phrase:
        lst_conflict_phrase.append(dict_conflict[word])
    
    mean_conf_phrase = sum(lst_conflict_phrase) / len(lst_conflict_phrase)
    
    df_VPs_Romantik_core.at[i,'mean_conf_dornseiff'] = mean_conf_phrase
    
    i += 1

In [71]:
df_VPs_Romantik_core.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[dem, schutzgeist, bleibt, ein, treuer, sinn, ...","[der, schutzgeist, bleiben, einen, treu, sinn,...",-0.008388,0.007577,-0.000145,0.007036,0.224101,0.006323,0.004102,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,0
1,"[so, ward, auch, mir, ein, hochgesellig, leben...","[so, ward, auch, sich, einen, hochgesellig, le...",0.000133,-0.000132,-0.000119,0.007223,0.257948,0.005676,-0.007146,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,1
2,"[wo, sich, die, worte, leicht, zum, lied, gere...","[wo, sich, der, wort, leicht, zum, lied, reihen]",-0.000316,-0.002302,-4.8e-05,0.009215,0.211023,0.005506,-0.01941,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,2
3,"[mein, lied, und, ich, wir, bleiben, treu, erg...","[mein, lied, und, ich, ich, bleiben, treu, erg...",-0.002559,-0.011589,-0.000559,0.013513,0.267624,0.006237,0.006048,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,3
4,"[der, uns, hat, durch, melodie, geweiht]","[der, sich, haben, durch, melodie, weihen]",0.021854,0.004098,-0.000289,-0.004865,0.249832,0.006091,-0.008013,,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,4


In [72]:
# REALISMUS

#determine values for all types

dict_conflict = {}
lst_conflict = []

for word in list(set(lst_words_Realismus_LEMMATIZED)):
    try:
        conflict_value = conf_value(word, model_RealismusNaturalismus)
        lst_conflict.append(conflict_value)
        dict_conflict[word] = conflict_value
    except:
        continue

In [73]:
i = 0

for phrase in lst_phrases_Realismus_LEMMATIZED:
    
    lst_conflict_phrase = []
    
    for word in phrase:
        lst_conflict_phrase.append(dict_conflict[word])
    
    mean_conf_phrase = sum(lst_conflict_phrase) / len(lst_conflict_phrase)
    
    df_VPs_Realismus.at[i,'mean_conf_dornseiff'] = mean_conf_phrase
    
    i += 1

In [74]:
df_VPs_Realismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[es, klopfte, hart, und, kurz, an, die, tür]","[ich, klopfen, hart, und, kurz, an, der, tür]",-0.051287,-0.029731,-0.001594,-0.014026,0.239272,0.00693,0.01286,,Boy-Ed_Ida_Empor,0,6496,0
1,"[obgleich, irene, seit, vielen, stunden, bald,...","[obgleich, irene, seit, viel, stunde, bald, wa...",-0.023664,-0.023765,-0.000829,-0.028803,0.25432,0.008266,0.010286,,Boy-Ed_Ida_Empor,0,6496,1
2,"[fuhr, sie, nun, doch, erschreckt, zusammen]","[fahren, ich, nun, doch, erschrecken, zusammen]",-0.043396,-0.008009,0.001012,-0.033831,0.252944,0.009461,0.030593,,Boy-Ed_Ida_Empor,0,6496,2
3,"[die, ganze, nacht, hatte, sie, keinen, rechte...","[der, ganze, nacht, haben, ich, kein, recht, s...",-0.027959,-0.033338,-0.001239,-0.035174,0.273068,0.011571,0.012095,,Boy-Ed_Ida_Empor,0,6496,3
4,"[und, nun, schien, es, ihr, als, wären, ihre, ...","[und, nun, scheinen, ich, mein, als, sein, mei...",-0.037474,-0.032536,-0.001683,-0.044963,0.29948,0.015282,0.014476,,Boy-Ed_Ida_Empor,0,6496,4


In [75]:
# NATURALISMUS

#determine values for all types

dict_conflict = {}
lst_conflict = []

for word in list(set(lst_words_Naturalismus_LEMMATIZED)):
    try:
        conflict_value = conf_value(word, model_RealismusNaturalismus)
        lst_conflict.append(conflict_value)
        dict_conflict[word] = conflict_value
    except:
        continue

In [76]:
i = 0

for phrase in lst_phrases_Naturalismus_LEMMATIZED:
    
    lst_conflict_phrase = []
    
    for word in phrase:
        lst_conflict_phrase.append(dict_conflict[word])
    
    mean_conf_phrase = sum(lst_conflict_phrase) / len(lst_conflict_phrase)
    
    df_VPs_Naturalismus.at[i,'mean_conf_dornseiff'] = mean_conf_phrase
    
    i += 1

In [77]:
df_VPs_Naturalismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[in, der, morgenstille, war, nichts, vernehmba...","[in, der, morgenstille, sein, nichts, vernehmb...",-0.041573,-0.009479,-0.000255,-0.018016,0.236784,0.005706,-0.001981,,Andreas-Salome_Lou_Ruth,0,8249,0
1,"[die, sich, nicht, weit, von, der, russischen,...","[der, sich, nicht, weit, von, der, russisch, h...",-0.017482,-0.012251,-0.000511,-0.002285,0.230448,0.0063,0.004817,,Andreas-Salome_Lou_Ruth,0,8249,1
2,"[die, breite, ungepflasterte, straße, die, sic...","[der, breit, ungepflasterte, straße, der, sich...",-0.023813,-0.023564,-0.00089,-0.005926,0.223625,0.005873,0.002291,,Andreas-Salome_Lou_Ruth,0,8249,2
3,"[dann, holperte, ein, leiterwagen, mit, einige...","[dann, holpern, einen, leiterwagen, mit, einig...",-0.05482,-0.040984,-0.00252,-0.011334,0.213727,0.004154,0.033816,,Andreas-Salome_Lou_Ruth,0,8249,3
4,"[der, fuhrmann, kletterte, von, seinem, sitz, ...","[der, fuhrmann, klettern, von, mein, sitz, wer...",-0.04453,-0.022021,-0.001098,-0.015741,0.23927,0.005927,0.006729,,Andreas-Salome_Lou_Ruth,0,8249,4


#### Approach 3 (Annotation)

In [78]:
# define labels

lst_high_conflict = ['bedenklich',
 'ängstlich',
 'grausam',
 'gewehr',
 'ballen',
 'stechen',
 'messer',
 'einschlagen',
 'gehorchen',
 'schelten']



lst_low_conflict = ['trösten',
 'reich',
 'beruhigen',
 'gewinnen',
 'rücken',
 'bitten',
 'leicht' ]


In [79]:
# ROMANTIK

#determine values for all types

dict_conflict = {}
lst_conflict = []

for word in list(set(lst_words_Romantik_core_LEMMATIZED)):
    try:
        conflict_value = conf_value(word, model_Romantik)
        lst_conflict.append(conflict_value)
        dict_conflict[word] = conflict_value
    except:
        continue

In [80]:
i = 0

for phrase in lst_phrases_Romantik_core_LEMMATIZED:
    
    lst_conflict_phrase = []
    
    for word in phrase:
        lst_conflict_phrase.append(dict_conflict[word])
    
    mean_conf_phrase = sum(lst_conflict_phrase) / len(lst_conflict_phrase)
    
    df_VPs_Romantik_core.at[i,'mean_conf_annotation'] = mean_conf_phrase
    
    i += 1

In [81]:
df_VPs_Romantik_core.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[dem, schutzgeist, bleibt, ein, treuer, sinn, ...","[der, schutzgeist, bleiben, einen, treu, sinn,...",-0.008388,0.007577,-0.000145,0.007036,0.224101,0.006323,0.004102,-0.016558,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,0
1,"[so, ward, auch, mir, ein, hochgesellig, leben...","[so, ward, auch, sich, einen, hochgesellig, le...",0.000133,-0.000132,-0.000119,0.007223,0.257948,0.005676,-0.007146,-0.024458,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,1
2,"[wo, sich, die, worte, leicht, zum, lied, gere...","[wo, sich, der, wort, leicht, zum, lied, reihen]",-0.000316,-0.002302,-4.8e-05,0.009215,0.211023,0.005506,-0.01941,-0.033575,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,2
3,"[mein, lied, und, ich, wir, bleiben, treu, erg...","[mein, lied, und, ich, ich, bleiben, treu, erg...",-0.002559,-0.011589,-0.000559,0.013513,0.267624,0.006237,0.006048,-0.022191,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,3
4,"[der, uns, hat, durch, melodie, geweiht]","[der, sich, haben, durch, melodie, weihen]",0.021854,0.004098,-0.000289,-0.004865,0.249832,0.006091,-0.008013,-0.029408,"Arnim_Achim_von_Armut,_Reichtum,_Schuld_und_Bu...",0,17602,4


In [82]:
# REALISMUS

#determine values for all types

dict_conflict = {}
lst_conflict = []

for word in list(set(lst_words_Realismus_LEMMATIZED)):
    try:
        conflict_value = conf_value(word, model_RealismusNaturalismus)
        lst_conflict.append(conflict_value)
        dict_conflict[word] = conflict_value
    except:
        continue

In [83]:
i = 0

for phrase in lst_phrases_Realismus_LEMMATIZED:
    
    lst_conflict_phrase = []
    
    for word in phrase:
        lst_conflict_phrase.append(dict_conflict[word])
    
    mean_conf_phrase = sum(lst_conflict_phrase) / len(lst_conflict_phrase)
    
    df_VPs_Realismus.at[i,'mean_conf_annotation'] = mean_conf_phrase
    
    i += 1

In [84]:
df_VPs_Realismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[es, klopfte, hart, und, kurz, an, die, tür]","[ich, klopfen, hart, und, kurz, an, der, tür]",-0.051287,-0.029731,-0.001594,-0.014026,0.239272,0.00693,0.01286,0.029283,Boy-Ed_Ida_Empor,0,6496,0
1,"[obgleich, irene, seit, vielen, stunden, bald,...","[obgleich, irene, seit, viel, stunde, bald, wa...",-0.023664,-0.023765,-0.000829,-0.028803,0.25432,0.008266,0.010286,0.008729,Boy-Ed_Ida_Empor,0,6496,1
2,"[fuhr, sie, nun, doch, erschreckt, zusammen]","[fahren, ich, nun, doch, erschrecken, zusammen]",-0.043396,-0.008009,0.001012,-0.033831,0.252944,0.009461,0.030593,0.004349,Boy-Ed_Ida_Empor,0,6496,2
3,"[die, ganze, nacht, hatte, sie, keinen, rechte...","[der, ganze, nacht, haben, ich, kein, recht, s...",-0.027959,-0.033338,-0.001239,-0.035174,0.273068,0.011571,0.012095,0.013931,Boy-Ed_Ida_Empor,0,6496,3
4,"[und, nun, schien, es, ihr, als, wären, ihre, ...","[und, nun, scheinen, ich, mein, als, sein, mei...",-0.037474,-0.032536,-0.001683,-0.044963,0.29948,0.015282,0.014476,0.013893,Boy-Ed_Ida_Empor,0,6496,4


In [85]:
# NATURALISMUS

#determine values for all types

dict_conflict = {}
lst_conflict = []

for word in list(set(lst_words_Naturalismus_LEMMATIZED)):
    try:
        conflict_value = conf_value(word, model_RealismusNaturalismus)
        lst_conflict.append(conflict_value)
        dict_conflict[word] = conflict_value
    except:
        continue

In [86]:
i = 0

for phrase in lst_phrases_Naturalismus_LEMMATIZED:
    
    lst_conflict_phrase = []
    
    for word in phrase:
        lst_conflict_phrase.append(dict_conflict[word])
    
    mean_conf_phrase = sum(lst_conflict_phrase) / len(lst_conflict_phrase)
    
    df_VPs_Naturalismus.at[i,'mean_conf_annotation'] = mean_conf_phrase
    
    i += 1

In [87]:
df_VPs_Naturalismus.head()

Unnamed: 0,phrase_tokenized,phrase_lemmatized,mean_val_adj,mean_aro_adj,mean_ep_adj,mean_val_noun,mean_aro_noun,mean_ep_noun,mean_conf_dornseiff,mean_conf_annotation,novel_title,novel_beg,novel_end,phrase_pos
0,"[in, der, morgenstille, war, nichts, vernehmba...","[in, der, morgenstille, sein, nichts, vernehmb...",-0.041573,-0.009479,-0.000255,-0.018016,0.236784,0.005706,-0.001981,0.021268,Andreas-Salome_Lou_Ruth,0,8249,0
1,"[die, sich, nicht, weit, von, der, russischen,...","[der, sich, nicht, weit, von, der, russisch, h...",-0.017482,-0.012251,-0.000511,-0.002285,0.230448,0.0063,0.004817,0.016264,Andreas-Salome_Lou_Ruth,0,8249,1
2,"[die, breite, ungepflasterte, straße, die, sic...","[der, breit, ungepflasterte, straße, der, sich...",-0.023813,-0.023564,-0.00089,-0.005926,0.223625,0.005873,0.002291,0.018371,Andreas-Salome_Lou_Ruth,0,8249,2
3,"[dann, holperte, ein, leiterwagen, mit, einige...","[dann, holpern, einen, leiterwagen, mit, einig...",-0.05482,-0.040984,-0.00252,-0.011334,0.213727,0.004154,0.033816,0.043843,Andreas-Salome_Lou_Ruth,0,8249,3
4,"[der, fuhrmann, kletterte, von, seinem, sitz, ...","[der, fuhrmann, klettern, von, mein, sitz, wer...",-0.04453,-0.022021,-0.001098,-0.015741,0.23927,0.005927,0.006729,0.032351,Andreas-Salome_Lou_Ruth,0,8249,4


#### Wrap-up

In [88]:
# save dataframes as csv

df_VPs_Romantik_core.to_csv('..//Analyseergebnisse//csv//230420_df_VPs_Romantik_core.csv', encoding='utf-8-sig') 

In [89]:
df_VPs_Realismus.to_csv('..//Analyseergebnisse//csv//230420_df_VPs_Realismus.csv', encoding='utf-8-sig') 

In [90]:
df_VPs_Naturalismus.to_csv('..//Analyseergebnisse//csv//230420_df_VPs_Naturalismus.csv', encoding='utf-8-sig') 

In [91]:
# save dataframes as pkl

df_VPs_Romantik_core.to_pickle('..//Analyseergebnisse//pickled//230420_df_VPs_Romantik_core.pkl')

In [92]:
df_VPs_Realismus.to_pickle('..//Analyseergebnisse//pickled//230420_df_VPs_Realismus.pkl')

In [93]:
df_VPs_Naturalismus.to_pickle('..//Analyseergebnisse//pickled//230420_df_VPs_Naturalismus.pkl')

Ressources used:

https://github.com/jbrottrager/character-shifts-HPFFS/blob/main/scripts/10_emotionalProfiles.ipynb (last viewed: 20.04.2023)