# Character Shifts in Harry Potter Fanfics

# BAWL Cluster

### Last updated: 19.01.2022

## 0. Preliminaries

### 0.1 Import/Load

Import Modules:

In [1]:
import numpy as np
import pickle
import pandas as pd
from itertools import cycle
from gensim.models import KeyedVectors
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from sklearn.cluster import AffinityPropagation as AF



Load KeyesVectors Harry Potter books/fanfiction:

In [2]:
path_data = r'Z:\Fanfiction\HP_Character-Distribution\pamphlet_character_shifts\data'
path_models = r'Z:\Fanfiction\HP_Character-Distribution\pamphlet_character_shifts\results\vector_models'
path_pickled = r'Z:\Fanfiction\HP_Character-Distribution\pamphlet_character_shifts\results\pickled'

In [3]:
model_books = KeyedVectors.load(path_models + '\\modelHPoriginalsD_vectors.kv')

In [4]:
model_ff = KeyedVectors.load(path_models + '\\modelHPFFsD_vectors.kv')

# 1. BAWL

Load BAWL with word frequency of at least 50

In [5]:
bawl = pd.read_csv(path_data + '\\bawl\\BAWLR_with_freqs.csv')
bawl_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["FFS_FREQ"] >= 50)]["WORD_LOWER"])

## 1.1 Originals

### 1.1.1 Valence

#### 1.1.1a High Valence

Define relevant valence words by threshold value

In [6]:
emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

Define relevant valence words by ranking

In [7]:
top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

Get vectors for valence words

In [8]:
valence_words_index = [model_books.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [model_books.get_vector(i) for i in valence_words_index]

Normalize vectors on unit circle

In [9]:
valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

Cluster with Affinity Propagation on PCA

In [10]:
valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [11]:
valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

{0: ['lust', 'geschenk', 'spaß', 'wahrheit', 'sommer', 'ferien'], 3: ['begeistern', 'mutig', 'ehrlich', 'lieb', 'freuen', 'vertrauen', 'gefühl', 'leben', 'freund', 'lieben'], 1: ['strahlen', 'luft', 'warm', 'himmel', 'sonne'], 2: ['lächeln', 'glück', 'lachen', 'freude']}


Print relevant information of clusters

In [12]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_books.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        

a: ITEMS ['lust', 'geschenk', 'spaß', 'wahrheit', 'sommer', 'ferien']
a: AVG SIMILARITY 0.3094934562842051
a: MEAN VALENCE 2.3872549019607843
a: MEAN VALENCE STD 0.7148446459074046
a: MEAN AROUSAL 3.0387570232771473
a: MEAN AROUSAL STD 1.2357668381043003
a: SUGGESTION VALUE 2.6559405250421024
b: ITEMS ['strahlen', 'luft', 'warm', 'himmel', 'sonne']
b: AVG SIMILARITY 0.32349845841526986
b: MEAN VALENCE 2.29
b: MEAN VALENCE STD 0.7015038295493231
b: MEAN AROUSAL 2.296666666666667
b: MEAN AROUSAL STD 1.0229279794503074
b: SUGGESTION VALUE 2.440243492420639
c: ITEMS ['lächeln', 'glück', 'lachen', 'freude']
c: AVG SIMILARITY 0.24586826066176096
c: MEAN VALENCE 2.5647058823529414
c: MEAN VALENCE STD 0.6449365630774454
c: MEAN AROUSAL 2.883893557422969
c: MEAN AROUSAL STD 1.3073861853835012
c: SUGGESTION VALUE 2.6323460136486108
d: ITEMS ['begeistern', 'mutig', 'ehrlich', 'lieb', 'freuen', 'vertrauen', 'gefühl', 'leben', 'freund', 'lieben']
d: AVG SIMILARITY 0.26679692086246276
d: MEAN VALENC

Ranking for suggestion values

In [13]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['begeistern', 'mutig', 'ehrlich', 'lieb', 'freuen', 'vertrauen', 'gefühl', 'leben', 'freund', 'lieben']
2 ['lust', 'geschenk', 'spaß', 'wahrheit', 'sommer', 'ferien']
3 ['lächeln', 'glück', 'lachen', 'freude']
4 ['strahlen', 'luft', 'warm', 'himmel', 'sonne']


In [14]:
high_val_originals = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_originals.pkl", "wb") as f:
    pickle.dump(high_val_originals, f)

#### 1.1.1b Low Valence

Define relevant valence words by threshold value

In [15]:
emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

Define relevant valence words by ranking

In [16]:
bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

Get vectors for valence words

In [17]:
valence_words_index = [model_books.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [model_books.get_vector(i) for i in valence_words_index]

Normalize vectors on unit circle

In [18]:
valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

Cluster with Affinity Propagation

In [19]:
valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [20]:
valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

{0: ['tod', 'töten', 'zerstören', 'stehlen', 'fluch', 'zwingen', 'verletzen', 'gefahr'], 4: ['angst', 'schlecht', 'ärger', 'schlimm', 'übel', 'problem'], 1: ['tot', 'leiche', 'grab', 'kerker', 'verboten', 'werwolf'], 2: ['traurig', 'entsetzt', 'schuld'], 3: ['schlagen', 'heulen']}


Print relevant information of clusters

In [21]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_books.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['tod', 'töten', 'zerstören', 'stehlen', 'fluch', 'zwingen', 'verletzen', 'gefahr']
a: AVG SIMILARITY 0.2750306650996208
a: MEAN VALENCE -2.2705882352941176
a: MEAN VALENCE STD 0.7390226761105867
a: MEAN AROUSAL 3.990773809523809
a: MEAN AROUSAL STD 1.032955568604849
a: SUGGESTION VALUE 2.6691654262959097
b: ITEMS ['tot', 'leiche', 'grab', 'kerker', 'verboten', 'werwolf']
b: AVG SIMILARITY 0.2563492305576801
b: MEAN VALENCE -2.0916666666666663
b: MEAN VALENCE STD 0.7646569141161882
b: MEAN AROUSAL 3.903983815748522
b: MEAN AROUSAL STD 0.9854678577212209
b: SUGGESTION VALUE 2.121852470807677
c: ITEMS ['traurig', 'entsetzt', 'schuld']
c: AVG SIMILARITY 0.28608205914497375
c: MEAN VALENCE -1.866666666666667
c: MEAN VALENCE STD 0.6643486305132159
c: MEAN AROUSAL 3.320105820105821
c: MEAN AROUSAL STD 0.9038619394601151
c: SUGGESTION VALUE 1.7230091603198396
d: ITEMS ['schlagen', 'heulen']
d: AVG SIMILARITY 0.37247055768966675
d: MEAN VALENCE -1.8058823529411765
d: MEAN VALENCE STD 

Ranking for suggestion values

In [22]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['tod', 'töten', 'zerstören', 'stehlen', 'fluch', 'zwingen', 'verletzen', 'gefahr']
2 ['tot', 'leiche', 'grab', 'kerker', 'verboten', 'werwolf']
3 ['angst', 'schlecht', 'ärger', 'schlimm', 'übel', 'problem']
4 ['traurig', 'entsetzt', 'schuld']
5 ['schlagen', 'heulen']


In [23]:
low_val_originals = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_originals.pkl", "wb") as f:
    pickle.dump(low_val_originals, f)

### 1.1.2 Arousal

#### 1.1.2a High Arousal

Define relevant arousal words by threshold value

In [24]:
arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

Define relevant arousal words by ranking

In [25]:
top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

Get vectors for arousal words

In [26]:
arousal_words_index = [model_books.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [model_books.get_vector(i) for i in arousal_words_index]

Normalize vectors on unit circle

In [27]:
arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

Cluster with Affinity Propagation

In [28]:
arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [29]:
arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

{1: ['verfolgen', 'leiden', 'gefahr'], 5: ['feuer', 'weinen', 'schlagen', 'zorn', 'schmerz', 'schreck', 'schrei', 'panik'], 2: ['schlecht', 'ärger', 'angst'], 0: ['reizen', 'drängen', 'scharf'], 3: ['zwingen', 'verraten', 'warnen', 'leid'], 4: ['kampf', 'kerker', 'leiche', 'tot']}


Print relevant information of clusters

In [30]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_books.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['reizen', 'drängen', 'scharf']
a: AVG SIMILARITY 0.21568880478541055
a: MEAN VALENCE -0.5745098039215687
a: MEAN VALENCE STD 1.1022207662885075
a: MEAN AROUSAL 3.9461988304093567
a: MEAN AROUSAL STD 0.8524465232931827
a: SUGGESTION VALUE 3.831091767165072
b: ITEMS ['verfolgen', 'leiden', 'gefahr']
b: AVG SIMILARITY 0.2641720771789551
b: MEAN VALENCE -1.8254901960784313
b: MEAN VALENCE STD 1.0265575454742872
b: MEAN AROUSAL 3.9714285714285715
b: MEAN AROUSAL STD 0.7901114534485343
b: SUGGESTION VALUE 3.988584429545929
c: ITEMS ['schlecht', 'ärger', 'angst']
c: AVG SIMILARITY 0.3494122177362442
c: MEAN VALENCE -2.266666666666666
c: MEAN VALENCE STD 0.6464019662600337
c: MEAN AROUSAL 4.034391534391535
c: MEAN AROUSAL STD 0.8017971118115712
c: SUGGESTION VALUE 4.14662043699605
d: ITEMS ['zwingen', 'verraten', 'warnen', 'leid']
d: AVG SIMILARITY 0.32215701043605804
d: MEAN VALENCE -1.4838235294117648
d: MEAN VALENCE STD 1.1907052659492896
d: MEAN AROUSAL 3.9722222222222223
d: MEAN

Ranking for suggestion values

In [31]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['feuer', 'weinen', 'schlagen', 'zorn', 'schmerz', 'schreck', 'schrei', 'panik']
2 ['zwingen', 'verraten', 'warnen', 'leid']
3 ['kampf', 'kerker', 'leiche', 'tot']
4 ['schlecht', 'ärger', 'angst']
5 ['verfolgen', 'leiden', 'gefahr']
6 ['reizen', 'drängen', 'scharf']


In [32]:
high_arousal_originals = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_originals.pkl", "wb") as f:
    pickle.dump(high_arousal_originals, f)

#### 1.1.2b Low Arousal

Define relevant arousal words by threshold value

In [33]:
arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

Define relevant arousal words by ranking

In [34]:
bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["ORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

Get vectors for arousal words

In [35]:
arousal_words_index = [model_books.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [model_books.get_vector(i) for i in arousal_words_index]

Normalize vectors on unit circle

In [36]:
arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

Cluster with Affinity Propagation

In [37]:
arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [38]:
arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

{0: ['schlaf', 'still', 'pause', 'wenig', 'schweigen'], 1: ['schale', 'kamin', 'baum', 'stumm', 'boden', 'ding', 'gras', 'teller', 'kissen', 'feder', 'becher', 'glas', 'tisch', 'hand', 'sessel'], 2: ['decke', 'himmel', 'zimmer'], 3: ['bleiben'], 4: ['reichen']}


Print relevant information of clusters

In [39]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_books.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['schlaf', 'still', 'pause', 'wenig', 'schweigen']
a: AVG SIMILARITY 0.29559065103530885
a: MEAN VALENCE 0.4970588235294118
a: MEAN VALENCE STD 1.1616949083788004
a: MEAN AROUSAL 1.4018045112781956
a: MEAN AROUSAL STD 0.6254920175095593
a: SUGGESTION VALUE 2.8949819665516756
b: ITEMS ['schale', 'kamin', 'baum', 'stumm', 'boden', 'ding', 'gras', 'teller', 'kissen', 'feder', 'becher', 'glas', 'tisch', 'hand', 'sessel']
b: AVG SIMILARITY 0.3271927245670841
b: MEAN VALENCE 0.7684313725490195
b: MEAN VALENCE STD 0.9616918156166732
b: MEAN AROUSAL 1.6276451914098973
b: MEAN AROUSAL STD 0.7980606789191464
b: SUGGESTION VALUE 3.953054604055741
c: ITEMS ['decke', 'himmel', 'zimmer']
c: AVG SIMILARITY 0.34883734583854675
c: MEAN VALENCE 1.5533333333333335
c: MEAN VALENCE STD 1.07971627
c: MEAN AROUSAL 1.6355555555555557
c: MEAN AROUSAL STD 0.8683506410051516
c: SUGGESTION VALUE 2.1357384216827593
d: ITEMS ['bleiben']
d: MEAN VALENCE 0.8235294117647058
d: MEAN VALENCE STD 0.7966059911708

Ranking for suggestion values

In [40]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['schale', 'kamin', 'baum', 'stumm', 'boden', 'ding', 'gras', 'teller', 'kissen', 'feder', 'becher', 'glas', 'tisch', 'hand', 'sessel']
2 ['schlaf', 'still', 'pause', 'wenig', 'schweigen']
3 ['decke', 'himmel', 'zimmer']
4 ['bleiben']
5 ['reichen']


In [41]:
low_arousal_originals = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_originals.pkl", "wb") as f:
    pickle.dump(low_arousal_originals, f)

## 1.2 Fanfiction

### 1.2.1 Valence

#### 1.2.1a High Valence

Define relevant valence words by threshold value

In [42]:
emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

Define relevant valence words by ranking

In [43]:
top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

Get vectors for valence words

In [44]:
valence_words_index = [model_ff.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [model_ff.get_vector(i) for i in valence_words_index]

Normalize vectors on unit circle

In [45]:
valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

Cluster with Affinity Propagation on PCA

In [46]:
valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [47]:
valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

{3: ['urlaub', 'zuhause', 'wahrheit', 'sommer', 'ferien'], 2: ['harmonie', 'frieden', 'freiheit'], 0: ['super', 'kreativ', 'prima', 'lieben'], 1: ['idylle', 'sonne', 'paradies'], 6: ['lebendig', 'heilung', 'gesund'], 5: ['erdbeere', 'freund', 'sex', 'küssen'], 4: ['glück', 'lachen', 'freude']}


Print relevant information of clusters

In [48]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_ff.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['super', 'kreativ', 'prima', 'lieben']
a: AVG SIMILARITY 0.2902863969405492
a: MEAN VALENCE 2.626470588235294
a: MEAN VALENCE STD 0.558484207998896
a: MEAN AROUSAL 3.135014619883041
a: MEAN AROUSAL STD 1.2333064966616738
a: SUGGESTION VALUE 2.8664952998634377
b: ITEMS ['idylle', 'sonne', 'paradies']
b: AVG SIMILARITY 0.2452204575141271
b: MEAN VALENCE 2.6333333333333333
b: MEAN VALENCE STD 0.5756622808474438
b: MEAN AROUSAL 2.335511982570806
b: MEAN AROUSAL STD 1.3021731903900922
b: SUGGESTION VALUE 2.6658847842637696
c: ITEMS ['harmonie', 'frieden', 'freiheit']
c: AVG SIMILARITY 0.3926520049571991
c: MEAN VALENCE 2.6098039215686275
c: MEAN VALENCE STD 0.6117400275613434
c: MEAN AROUSAL 2.4586056644880174
c: MEAN AROUSAL STD 1.0706733238363053
c: SUGGESTION VALUE 2.7675524925387602
d: ITEMS ['urlaub', 'zuhause', 'wahrheit', 'sommer', 'ferien']
d: AVG SIMILARITY 0.35850543677806856
d: MEAN VALENCE 2.51
d: MEAN VALENCE STD 0.6544012272559334
d: MEAN AROUSAL 2.3922385620915034
d

Ranking for suggestion values

In [49]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['super', 'kreativ', 'prima', 'lieben']
2 ['urlaub', 'zuhause', 'wahrheit', 'sommer', 'ferien']
3 ['harmonie', 'frieden', 'freiheit']
4 ['glück', 'lachen', 'freude']
5 ['idylle', 'sonne', 'paradies']
6 ['erdbeere', 'freund', 'sex', 'küssen']
7 ['lebendig', 'heilung', 'gesund']


In [50]:
high_val_ffs = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_ffs.pkl", "wb") as f:
    pickle.dump(high_val_ffs, f)

#### 1.2.1b Low Valence

Define relevant valence words by threshold value

In [51]:
emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

Define relevant valence words by ranking

In [52]:
bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

Get vectors for valence words

In [53]:
valence_words_index = [model_ff.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [model_ff.get_vector(i) for i in valence_words_index]

Normalize vectors on unit circle

In [54]:
valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

Cluster with Affinity Propagation

In [55]:
valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [56]:
valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

{0: ['krieg', 'weltkrieg', 'tod', 'alptraum'], 2: ['foltern', 'mord', 'töten', 'morden', 'lynchen', 'angst', 'zerstören', 'tot', 'gift', 'hassen', 'leiche'], 1: ['massaker', 'folter', 'gewalt', 'qual', 'strafe'], 3: ['todfeind', 'erzfeind'], 4: ['tyrann', 'unmensch', 'herzlos']}


Print relevant information of clusters

In [57]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_ff.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['krieg', 'weltkrieg', 'tod', 'alptraum']
a: AVG SIMILARITY 0.32669998953739804
a: MEAN VALENCE -2.8308823529411766
a: MEAN VALENCE STD 0.5392358281616656
a: MEAN AROUSAL 4.443860877684407
a: MEAN AROUSAL STD 0.8094890873299709
a: SUGGESTION VALUE 3.182616552817918
b: ITEMS ['massaker', 'folter', 'gewalt', 'qual', 'strafe']
b: AVG SIMILARITY 0.3434549242258072
b: MEAN VALENCE -2.68
b: MEAN VALENCE STD 0.6023514378132673
b: MEAN AROUSAL 4.309857978279031
b: MEAN AROUSAL STD 0.7021633726519599
b: SUGGESTION VALUE 3.090009740612795
c: ITEMS ['foltern', 'mord', 'töten', 'morden', 'lynchen', 'angst', 'zerstören', 'tot', 'gift', 'hassen', 'leiche']
c: AVG SIMILARITY 0.29135472567921333
c: MEAN VALENCE -2.607754010695187
c: MEAN VALENCE STD 0.6729715543654162
c: MEAN AROUSAL 4.271375408217514
c: MEAN AROUSAL STD 0.8975550393354602
c: SUGGESTION VALUE 3.807450038956106
d: ITEMS ['todfeind', 'erzfeind']
d: AVG SIMILARITY 0.49981966614723206
d: MEAN VALENCE -2.6
d: MEAN VALENCE STD 0.60

Ranking for suggestion values

In [58]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['foltern', 'mord', 'töten', 'morden', 'lynchen', 'angst', 'zerstören', 'tot', 'gift', 'hassen', 'leiche']
2 ['krieg', 'weltkrieg', 'tod', 'alptraum']
3 ['massaker', 'folter', 'gewalt', 'qual', 'strafe']
4 ['todfeind', 'erzfeind']
5 ['tyrann', 'unmensch', 'herzlos']


In [59]:
low_val_ffs = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_ffs.pkl", "wb") as f:
    pickle.dump(low_val_ffs, f)

### 1.1.2 Arousal

#### 1.2.2a High Arousal

Define relevant arousal words by threshold value

In [60]:
arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

Define relevant arousal words by ranking

In [61]:
top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

Get vectors for arousal words

In [62]:
arousal_words_index = [model_ff.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [model_ff.get_vector(i) for i in arousal_words_index]

Normalize vectors on unit circle

In [63]:
arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

Cluster with Affinity Propagation

In [64]:
arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [65]:
arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

{2: ['morden', 'mord', 'brutal', 'todfeind', 'massaker', 'attentat'], 4: ['trauma', 'terror', 'unheil', 'alptraum', 'krieg', 'weltkrieg'], 5: ['furcht', 'atemnot', 'ekstase', 'bestie', 'panik'], 3: ['schrei', 'alarm', 'erdbeben', 'notfall'], 0: ['erotik'], 1: ['hassen', 'foltern', 'folter']}


Print relevant information of clusters

In [66]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_ff.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['erotik']
a: MEAN VALENCE 2.3
a: MEAN VALENCE STD 0.483045891539648
a: MEAN AROUSAL 4.4375
a: MEAN AROUSAL STD 0.8139410298049853
a: SUGGESTION VALUE 0
b: ITEMS ['hassen', 'foltern', 'folter']
b: AVG SIMILARITY 0.3543806274731954
b: MEAN VALENCE -2.6999999999999997
b: MEAN VALENCE STD 0.5510494533333333
b: MEAN AROUSAL 4.524366471734893
b: MEAN AROUSAL STD 0.7406004551268303
b: SUGGESTION VALUE 4.790422008854566
c: ITEMS ['morden', 'mord', 'brutal', 'todfeind', 'massaker', 'attentat']
c: AVG SIMILARITY 0.3539106547832489
c: MEAN VALENCE -2.566666666666667
c: MEAN VALENCE STD 0.5612380832003703
c: MEAN AROUSAL 4.534342391927531
c: MEAN AROUSAL STD 0.803956688095666
c: SUGGESTION VALUE 5.473347745200678
d: ITEMS ['schrei', 'alarm', 'erdbeben', 'notfall']
d: AVG SIMILARITY 0.2482155834635099
d: MEAN VALENCE -1.6
d: MEAN VALENCE STD 0.8489156716345836
d: MEAN AROUSAL 4.466374269005848
d: MEAN AROUSAL STD 0.7250215146529922
d: SUGGESTION VALUE 4.849345249417905
e: ITEMS ['trauma',

Ranking for suggestion values

In [67]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['trauma', 'terror', 'unheil', 'alptraum', 'krieg', 'weltkrieg']
2 ['morden', 'mord', 'brutal', 'todfeind', 'massaker', 'attentat']
3 ['furcht', 'atemnot', 'ekstase', 'bestie', 'panik']
4 ['schrei', 'alarm', 'erdbeben', 'notfall']
5 ['hassen', 'foltern', 'folter']
6 ['erotik']


In [68]:
high_arousal_ffs = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_ffs.pkl", "wb") as f:
    pickle.dump(high_arousal_ffs, f)

#### 1.2.2b Low Arousal

Define relevant arousal words by threshold value

In [69]:
arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

Define relevant arousal words by ranking

In [70]:
bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["FFS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

Get vectors for arousal words

In [71]:
arousal_words_index = [model_ff.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [model_ff.get_vector(i) for i in arousal_words_index]

Normalize vectors on unit circle

In [72]:
arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

Cluster with Affinity Propagation

In [73]:
arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)



AffinityPropagation()

Tranform cluster to dictionary

In [74]:
arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

{0: ['schlaf', 'erholung', 'balsam', 'friede'], 2: ['still', 'pause', 'wenig'], 5: ['wiege', 'murmel', 'harfe'], 3: ['allee', 'buche', 'wiese', 'birke', 'linde', 'baum'], 4: ['aquarium', 'beutel', 'schale', 'kamin'], 1: ['weich', 'seide', 'zaghaft'], 6: ['zahm', 'passiv']}


Print relevant information of clusters

In [75]:
suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(model_ff.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

a: ITEMS ['schlaf', 'erholung', 'balsam', 'friede']
a: AVG SIMILARITY 0.26079168791572255
a: MEAN VALENCE 1.9749999999999996
a: MEAN VALENCE STD 0.8620923284305765
a: MEAN AROUSAL 1.2787925696594429
a: MEAN AROUSAL STD 0.6286330724775046
a: SUGGESTION VALUE 2.8605311371818876
b: ITEMS ['weich', 'seide', 'zaghaft']
b: AVG SIMILARITY 0.2685020789504051
b: MEAN VALENCE 0.6833333333333332
b: MEAN VALENCE STD 0.6742425666666666
b: MEAN AROUSAL 1.4375644994840042
b: MEAN AROUSAL STD 0.7186285995138822
b: SUGGESTION VALUE 2.4452616829175344
c: ITEMS ['still', 'pause', 'wenig']
c: AVG SIMILARITY 0.2568315714597702
c: MEAN VALENCE 0.2833333333333333
c: MEAN VALENCE STD 0.9889026933333334
c: MEAN AROUSAL 1.390142021720969
c: MEAN AROUSAL STD 0.6106310947327894
c: SUGGESTION VALUE 2.611669668976335
d: ITEMS ['allee', 'buche', 'wiese', 'birke', 'linde', 'baum']
d: AVG SIMILARITY 0.39870959222316743
d: MEAN VALENCE 1.325
d: MEAN VALENCE STD 1.056007904946277
d: MEAN AROUSAL 1.4131693613117762
d: ME

Ranking for suggestion values

In [76]:
suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

1 ['allee', 'buche', 'wiese', 'birke', 'linde', 'baum']
2 ['schlaf', 'erholung', 'balsam', 'friede']
3 ['aquarium', 'beutel', 'schale', 'kamin']
4 ['wiege', 'murmel', 'harfe']
5 ['still', 'pause', 'wenig']
6 ['weich', 'seide', 'zaghaft']
7 ['zahm', 'passiv']


In [77]:
low_arousal_ffs = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_ffs.pkl", "wb") as f:
    pickle.dump(low_arousal_ffs, f)