# Emotions GAM

Notebook para obtener las emociones que han despertado a los usuarios los cuadros de GAM. En este caso procedemos a:

1. Obtener la lista de emojis usados por los usuarios.
2. Cambiar los emojins por emociones de la escala de Plunchkin.
3. Crear el dataset final de emociones.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

import emojis

In [3]:
from context import community_module
# others
#from community_module.visualization.gephiVisualization import GephiVisualization

In [233]:
# algorithms
from community_module.community_detection.explainedCommunitiesDetection import ExplainedCommunitiesDetection
from community_module.community_detection.agglomerativeCommunityDetection import AgglomerativeCommunityDetection
from community_module.community_detection.kmedoidsCommunityDetection import KmedoidsCommunityDetection

In [234]:
# similarity
from sklearn.metrics.pairwise import cosine_similarity
from community_module.similarity.artworkEmotionSimilarity import ArtworkEmotionSimilarity

In [235]:
interactions_csv_df = pd.read_csv('../../data/GAM/clean_data/interactions.csv')
#users_csv_df = pd.read_csv('../../data/GAM/clean_data/users.csv')
users_csv_df = pd.read_csv('../../data/GAM/clean_data/usersStandardGenre.csv')

artworks_csv_df = pd.read_csv('../../data/GAM/clean_data/artwors.csv')


# Me quedo solo con las columnas user, artwork y q4 (emojis)
emojis_df = interactions_csv_df[['user', 'artwork', 'q4']]
emojis_df.head()

Unnamed: 0,user,artwork,q4
0,2,1,
1,2,2,
2,2,3,
3,2,4,
4,2,5,


In [236]:
users_csv_df

Unnamed: 0,id,gender,age,relationship_art,like_museums,languages
0,1,Female,34,[2],1,EN
1,2,Male,53,[2],1,EN
2,3,Male,33,[1],1,EN
3,4,Female,44,[3],2,EN
4,5,Male,37,[2],1,EN
...,...,...,...,...,...,...
121,122,Female,42.0,[2],2,IT
122,123,Female,24.0,[2],2,IT
123,124,Female,51.0,[3],2,IT
124,125,Female,47.0,[3],2,IT


In [237]:
artworks_csv_df

Unnamed: 0,id,title,url
0,1,Aracne,https://www.gamtorino.it/it/le-collezioni/cata...
1,2,Marina,https://www.gamtorino.it/en/node/39384
2,3,Asfissia! (parte sinistra),https://www.gamtorino.it/en/node/39175
3,4,La Sirena (Sirena) (Abisso verde),https://www.gamtorino.it/it/le-collezioni/cata...
4,5,Mercato Vecchio di Firenze,https://www.gamtorino.it/it/le-collezioni/cata...
5,6,Aprile,https://www.gamtorino.it/en/node/35215
6,7,Daphne (Daphne a Pavarolo),https://www.gamtorino.it/en/node/35323
7,8,La ragazza rossa (Testa di donna dai capelli r...,https://www.gamtorino.it/it/le-collezioni/cata...
8,9,Estate (L'amaca),https://www.gamtorino.it/en/node/35380
9,10,Gotine rosse,https://www.gamtorino.it/en/node/35221


In [238]:
# Eliminamos filas NaN
emojis_df = emojis_df[~emojis_df.isna()['q4']]
emojis_df

Unnamed: 0,user,artwork,q4
36,5,1,😍 feelings of love or infatuation
37,5,2,relaxed
38,5,3,😕 confused
39,5,4,😕 confused
40,5,5,🙂 slightly happy
...,...,...,...
1479,125,4,🙂 leggermente felice
1488,126,1,"😕 confusione, 🤔 pensieroso"
1489,126,2,tranquillità
1490,126,3,"😕 confusione, 😱 paura"


In [239]:
# Agrupamos por emojins
emojis_df.groupby(by='q4').count().sort_values(by='user', ascending=False)

Unnamed: 0_level_0,user,artwork
q4,Unnamed: 1_level_1,Unnamed: 2_level_1
🤔 thoughtful,27,27
🙂 slightly happy,24,24
🙂 leggermente felice,23,23
😄 felice,22,22
🤔 pensieroso,22,22
...,...,...
investigative,1,1
inquietata,1,1
fire,1,1
dolcezza,1,1


In [240]:
# extract emoji(s) from text
def get_emoji(text):
    # remove \u200d character in excel files
    text = text.replace('\u200d','')
    return emojis.get(text)

In [241]:
emojis_df['emoji'] = emojis_df['q4'].apply(get_emoji)
emojis_df.head()

Unnamed: 0,user,artwork,q4,emoji
36,5,1,😍 feelings of love or infatuation,{😍}
37,5,2,relaxed,{}
38,5,3,😕 confused,{😕}
39,5,4,😕 confused,{😕}
40,5,5,🙂 slightly happy,{🙂}


In [242]:
def has_emoji(emoji_column):
    return len(emoji_column) > 0

In [243]:
all_emojis = set().union(*list(emojis_df[emojis_df['emoji'].apply(has_emoji)]['emoji'].values))
all_emojis = list(all_emojis)
all_emojis

['🤐',
 '😢',
 '♀️',
 '🌡️',
 '🙁',
 '😄',
 '😍',
 '😌',
 '😴',
 '🧐',
 '🤫',
 '😮',
 '😱',
 '🙂',
 '🥰',
 '👩',
 '😬',
 '😎',
 '🙄',
 '💋',
 '👹',
 '☺️',
 '🤔',
 '😊',
 '😏',
 '🧠',
 '😡',
 '🤯',
 '😔',
 '🥺',
 '🛌',
 '❤️',
 '😥',
 '🤷',
 '🤢',
 '✨',
 '😂',
 '😕']

In [244]:
['☺️',
 '🌡️',
 '🤔',
 '🤐',
 '👹',
 '😔',
 '🤷',
 '🧐',
 '😢',
 '🙂',
 '😄',
 '✨',
 '😬',
 '🥺',
 '🤫',
 '🧠',
 '😎',
 '😊',
 '🥰',
 '❤️',
 '😱',
 '😮',
 '🙁',
 '🤯',
 '👩',
 '😴',
 '🙄',
 '🛌',
 '😡',
 '😥',
 '😕',
 '😏',
 '🤢',
 '😍',
 '😂',
 '♀️',
 '😌',
 '💋']

['☺️',
 '🌡️',
 '🤔',
 '🤐',
 '👹',
 '😔',
 '🤷',
 '🧐',
 '😢',
 '🙂',
 '😄',
 '✨',
 '😬',
 '🥺',
 '🤫',
 '🧠',
 '😎',
 '😊',
 '🥰',
 '❤️',
 '😱',
 '😮',
 '🙁',
 '🤯',
 '👩',
 '😴',
 '🙄',
 '🛌',
 '😡',
 '😥',
 '😕',
 '😏',
 '🤢',
 '😍',
 '😂',
 '♀️',
 '😌',
 '💋']

In [245]:
all_emojis[0]

'🤐'

In [246]:
# Voy a convertirlos a emociones con el dataset que se proporciona aquí https://github.com/abushoeb/EmoTag
emotags_df = pd.read_csv('../../data/GAM/clean_data/EmoTag1200-scores.csv')
#emotags_df = pd.read_csv('../../data/GAM/clean_data/EmoTag1200-scores-Copy1.csv')
emotags_df

Unnamed: 0,unicode,emoji,name,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
0,1F308,🌈,rainbow,0.00,0.28,0.00,0.00,0.69,0.06,0.22,0.33
1,1F319,🌙,crescent moon,0.00,0.31,0.00,0.00,0.25,0.00,0.06,0.25
2,1F31A,🌚,new moon face,0.06,0.08,0.17,0.06,0.42,0.19,0.06,0.11
3,1F31E,🌞,sun with face,0.00,0.22,0.00,0.00,0.78,0.00,0.11,0.22
4,1F31F,🌟,glowing star,0.00,0.28,0.00,0.00,0.53,0.00,0.25,0.31
...,...,...,...,...,...,...,...,...,...,...,...
145,2757,❗,exclamation mark,0.44,0.42,0.31,0.42,0.08,0.17,0.81,0.11
146,2764,❤,red heart,0.00,0.36,0.00,0.00,0.69,0.00,0.14,0.67
147,27A1,➡,right arrow,0.00,0.06,0.00,0.00,0.00,0.00,0.00,0.22
148,2B05,⬅,left arrow,0.17,0.14,0.17,0.14,0.00,0.14,0.03,0.06


In [247]:
emotags_df[emotags_df['emoji'] == all_emojis[2]]

Unnamed: 0,unicode,emoji,name,anger,anticipation,disgust,fear,joy,sadness,surprise,trust


In [248]:
emojis_found = emotags_df[emotags_df['emoji'].isin(all_emojis)]['emoji'].values

In [249]:
print('Emojis que no se encuentran en el sentiment analysis')
set(all_emojis).difference(set(emojis_found))

Emojis que no se encuentran en el sentiment analysis


{'☺️',
 '♀️',
 '❤️',
 '🌡️',
 '👩',
 '👹',
 '😮',
 '🙁',
 '🙂',
 '🙄',
 '🛌',
 '🤐',
 '🤔',
 '🤢',
 '🤫',
 '🤯',
 '🤷',
 '🥰',
 '🥺',
 '🧐',
 '🧠'}

In [250]:
print('Emojis que si se encuentran en el sentiment analysis')
set(emojis_found)

Emojis que si se encuentran en el sentiment analysis


{'✨',
 '💋',
 '😂',
 '😄',
 '😊',
 '😌',
 '😍',
 '😎',
 '😏',
 '😔',
 '😕',
 '😡',
 '😢',
 '😥',
 '😬',
 '😱',
 '😴'}

In [251]:
emotags_df[emotags_df['emoji'].isin(emojis_found)]

Unnamed: 0,unicode,emoji,name,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
37,1F48B,💋,kiss mark,0.0,0.28,0.0,0.0,0.78,0.0,0.19,0.69
67,1F602,😂,face with tears of joy,0.0,0.17,0.06,0.06,0.94,0.0,0.33,0.22
69,1F604,😄,grinning face with smiling eyes,0.0,0.36,0.0,0.0,0.86,0.0,0.28,0.28
75,1F60A,😊,smiling face with smiling eyes,0.0,0.42,0.0,0.0,0.92,0.0,0.33,0.47
77,1F60C,😌,relieved face,0.0,0.33,0.0,0.11,0.81,0.0,0.22,0.61
78,1F60D,😍,smiling face with heart-eyes,0.0,0.31,0.0,0.0,0.83,0.0,0.5,0.5
79,1F60E,😎,smiling face with sunglasses,0.0,0.22,0.0,0.0,0.75,0.0,0.06,0.33
80,1F60F,😏,smirking face,0.22,0.33,0.28,0.14,0.22,0.22,0.11,0.22
85,1F614,😔,pensive face,0.25,0.22,0.31,0.28,0.0,0.72,0.19,0.14
86,1F615,😕,confused face,0.19,0.42,0.36,0.36,0.0,0.39,0.28,0.17


In [252]:
not_found_emojis = list(set(all_emojis).difference(set(emojis_found)))
for em in not_found_emojis:
    data = emojis.db.get_emoji_by_code(em)
    #data = emoji.emojize(em)
    print(em, 'tags:', data.tags, 'aliases:', data.aliases)
#    print(emojis.db.get_emoji_by_code(em))


🤐 tags: ['silence', 'hush'] aliases: ['zipper_mouth_face']
♀️ tags: [] aliases: ['female_sign']
🌡️ tags: [] aliases: ['thermometer']
🙁 tags: [] aliases: ['slightly_frowning_face']
🧐 tags: [] aliases: ['monocle_face']
🤫 tags: ['silence', 'quiet'] aliases: ['shushing_face']
😮 tags: ['surprise', 'impressed', 'wow'] aliases: ['open_mouth']
🙂 tags: [] aliases: ['slightly_smiling_face']
🥰 tags: ['love'] aliases: ['smiling_face_with_three_hearts']
👩 tags: ['girls'] aliases: ['woman']
🙄 tags: [] aliases: ['roll_eyes']
👹 tags: ['monster'] aliases: ['japanese_ogre']
☺️ tags: ['blush', 'pleased'] aliases: ['relaxed']
🤔 tags: [] aliases: ['thinking']
🧠 tags: [] aliases: ['brain']
🤯 tags: ['mind', 'blown'] aliases: ['exploding_head']
🥺 tags: ['puppy', 'eyes'] aliases: ['pleading_face']
🛌 tags: [] aliases: ['sleeping_bed']
❤️ tags: ['love'] aliases: ['heart']
🤷 tags: [] aliases: ['shrug']
🤢 tags: ['sick', 'barf', 'disgusted'] aliases: ['nauseated_face']


In [253]:
data = emojis.db.get_emoji_by_code('☺️').aliases

In [254]:
data

['relaxed']

In [255]:
{
    '🥰': 'Love',
    '🧐': ''
}

{'🥰': 'Love', '🧐': ''}

In [256]:
#--------------------------------------------------------------------------------------------------------------------------
#    Prepare data for clustering algorithm (similarity: artworkEmotionSimilarity)
#--------------------------------------------------------------------------------------------------------------------------

In [257]:
emojis_df

Unnamed: 0,user,artwork,q4,emoji
36,5,1,😍 feelings of love or infatuation,{😍}
37,5,2,relaxed,{}
38,5,3,😕 confused,{😕}
39,5,4,😕 confused,{😕}
40,5,5,🙂 slightly happy,{🙂}
...,...,...,...,...
1479,125,4,🙂 leggermente felice,{🙂}
1488,126,1,"😕 confusione, 🤔 pensieroso","{🤔, 😕}"
1489,126,2,tranquillità,{}
1490,126,3,"😕 confusione, 😱 paura","{😱, 😕}"


In [258]:
emojis_df

Unnamed: 0,user,artwork,q4,emoji
36,5,1,😍 feelings of love or infatuation,{😍}
37,5,2,relaxed,{}
38,5,3,😕 confused,{😕}
39,5,4,😕 confused,{😕}
40,5,5,🙂 slightly happy,{🙂}
...,...,...,...,...
1479,125,4,🙂 leggermente felice,{🙂}
1488,126,1,"😕 confusione, 🤔 pensieroso","{🤔, 😕}"
1489,126,2,tranquillità,{}
1490,126,3,"😕 confusione, 😱 paura","{😱, 😕}"


In [259]:
"""
"""
# Change artwork to name of artwork
def get_name(artwork_id):
    return artworks_csv_df['title'] 

names_arts = emojis_df.apply(lambda row: artworks_csv_df[artworks_csv_df['id'] == row['artwork']].title.values[0], axis=1)
names_arts

36                                 Aracne
37                                 Marina
38             Asfissia! (parte sinistra)
39      La Sirena (Sirena) (Abisso verde)
40             Mercato Vecchio di Firenze
                      ...                
1479    La Sirena (Sirena) (Abisso verde)
1488                               Aracne
1489                               Marina
1490           Asfissia! (parte sinistra)
1491    La Sirena (Sirena) (Abisso verde)
Length: 752, dtype: object

In [260]:
emojis_df['artwork'] = names_arts
emojis_df

Unnamed: 0,user,artwork,q4,emoji
36,5,Aracne,😍 feelings of love or infatuation,{😍}
37,5,Marina,relaxed,{}
38,5,Asfissia! (parte sinistra),😕 confused,{😕}
39,5,La Sirena (Sirena) (Abisso verde),😕 confused,{😕}
40,5,Mercato Vecchio di Firenze,🙂 slightly happy,{🙂}
...,...,...,...,...
1479,125,La Sirena (Sirena) (Abisso verde),🙂 leggermente felice,{🙂}
1488,126,Aracne,"😕 confusione, 🤔 pensieroso","{🤔, 😕}"
1489,126,Marina,tranquillità,{}
1490,126,Asfissia! (parte sinistra),"😕 confusione, 😱 paura","{😱, 😕}"


In [261]:
users_csv_df

Unnamed: 0,id,gender,age,relationship_art,like_museums,languages
0,1,Female,34,[2],1,EN
1,2,Male,53,[2],1,EN
2,3,Male,33,[1],1,EN
3,4,Female,44,[3],2,EN
4,5,Male,37,[2],1,EN
...,...,...,...,...,...,...
121,122,Female,42.0,[2],2,IT
122,123,Female,24.0,[2],2,IT
123,124,Female,51.0,[3],2,IT
124,125,Female,47.0,[3],2,IT


In [262]:
# Separate lists of emojis in different columns
emojis_df2 = emojis_df.set_index(['user', 'artwork', 'q4']).apply(lambda x: x.explode()).reset_index()
emojis_df2

Unnamed: 0,user,artwork,q4,emoji
0,5,Aracne,😍 feelings of love or infatuation,😍
1,5,Marina,relaxed,
2,5,Asfissia! (parte sinistra),😕 confused,😕
3,5,La Sirena (Sirena) (Abisso verde),😕 confused,😕
4,5,Mercato Vecchio di Firenze,🙂 slightly happy,🙂
...,...,...,...,...
997,126,Aracne,"😕 confusione, 🤔 pensieroso",😕
998,126,Marina,tranquillità,
999,126,Asfissia! (parte sinistra),"😕 confusione, 😱 paura",😱
1000,126,Asfissia! (parte sinistra),"😕 confusione, 😱 paura",😕


In [263]:
# User sample with 2+ emotions in the same artwork
emojis_df2[(emojis_df2['user'] == 126)]

Unnamed: 0,user,artwork,q4,emoji
996,126,Aracne,"😕 confusione, 🤔 pensieroso",🤔
997,126,Aracne,"😕 confusione, 🤔 pensieroso",😕
998,126,Marina,tranquillità,
999,126,Asfissia! (parte sinistra),"😕 confusione, 😱 paura",😱
1000,126,Asfissia! (parte sinistra),"😕 confusione, 😱 paura",😕
1001,126,La Sirena (Sirena) (Abisso verde),😍 sentimenti di amore o infatuazione,😍


In [264]:
# User x artwork table (values: list of emojis)
user_artwork_emojis_df = pd.pivot_table(emojis_df, values='emoji', index='user', columns='artwork', fill_value=0, aggfunc=np.sum)
user_artwork_emojis_df

artwork,Aprile,Aracne,Asfissia! (parte sinistra),Autoritratto in forma di gufo,Daphne (Daphne a Pavarolo),Estate (L'amaca),Gotine rosse,La Sirena (Sirena) (Abisso verde),La ragazza rossa (Testa di donna dai capelli rossi),"Lo specchio della vita (E ciò che l'una fa, e le altre fanno)",Marina,Mercato Vecchio di Firenze
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
5,{😔},{😍},{😕},0,{},0,0,{😕},{🙂},0,{},{🙂}
6,{😔},{😱},{😕},"{🤢, 😱}",{🤔},{😍},{😴},{😍},{},{😴},{🤔},{🙂}
7,"{🤔, 🙂}","{😮, 🤔}","{😔, 🤢}",0,"{🤔, 😌}",0,0,{🙂},"{😴, 🙂}",0,"{🤔, 🙂}",{🙂}
8,{😴},{🙂},{😱},0,{😴},0,0,{😍},{😕},0,{😄},{😄}
9,"{🤔, 😔}","{😮, 🤔, 😱}","{🤔, 😕}","{😱, 😕}",{😍},{🙂},{😡},"{🤔, 😱}",{😱},{🙂},"{🤔, 🙂}","{🤔, 😕}"
...,...,...,...,...,...,...,...,...,...,...,...,...
122,"{😮, 🤔, 🙂}",{😍},"{🤔, 😔, 😕}","{😮, 😔, 🤔}","{🤔, 🙂}","{😍, 🙂}",{🤔},"{🤔, 😍}","{😍, 🙂}","{🤔, 🙂}",{🙂},{🤔}
123,{😔},{🙂},{😔},"{😮, 🤔, 😱}",{😢},{😍},"{🤔, 😡}",{😮},{😄},{😄},{😄},{🙂}
124,{🤔},{🤔},{🤫},0,{},0,0,{😱},{😱},{😄},0,{🙂}
125,0,{🙂},{🤔},0,0,0,0,{🙂},0,0,{🙂},0


In [265]:
# Try to get user x artwork (values: list of emotions based on the emojis)
# Just a test to see it returns the same result as emojis_df (not used), that means emojis_df2 is not really needed either
#user_artwork_emotions = pd.pivot_table(emojis_df2, values='emoji', index='user', columns='artwork', fill_value=0, aggfunc=np.sum)
# user_artwork_emotions

In [266]:
emotags_df

Unnamed: 0,unicode,emoji,name,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
0,1F308,🌈,rainbow,0.00,0.28,0.00,0.00,0.69,0.06,0.22,0.33
1,1F319,🌙,crescent moon,0.00,0.31,0.00,0.00,0.25,0.00,0.06,0.25
2,1F31A,🌚,new moon face,0.06,0.08,0.17,0.06,0.42,0.19,0.06,0.11
3,1F31E,🌞,sun with face,0.00,0.22,0.00,0.00,0.78,0.00,0.11,0.22
4,1F31F,🌟,glowing star,0.00,0.28,0.00,0.00,0.53,0.00,0.25,0.31
...,...,...,...,...,...,...,...,...,...,...,...
145,2757,❗,exclamation mark,0.44,0.42,0.31,0.42,0.08,0.17,0.81,0.11
146,2764,❤,red heart,0.00,0.36,0.00,0.00,0.69,0.00,0.14,0.67
147,27A1,➡,right arrow,0.00,0.06,0.00,0.00,0.00,0.00,0.00,0.22
148,2B05,⬅,left arrow,0.17,0.14,0.17,0.14,0.00,0.14,0.03,0.06


In [267]:
# Option 1 (simplified): assign the emotion with the highest value to each emoji
# https://stackoverflow.com/questions/36597732/find-column-with-the-highest-value-pandas
df = emotags_df
emotags_emotions_df = df.loc[:, ~df.columns.isin(['unicode', 'emoji','name','dominantEmotion'])]
emotags_df['dominantEmotion'] = emotags_emotions_df.idxmax(axis='columns')
emotags_df

Unnamed: 0,unicode,emoji,name,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,dominantEmotion
0,1F308,🌈,rainbow,0.00,0.28,0.00,0.00,0.69,0.06,0.22,0.33,joy
1,1F319,🌙,crescent moon,0.00,0.31,0.00,0.00,0.25,0.00,0.06,0.25,anticipation
2,1F31A,🌚,new moon face,0.06,0.08,0.17,0.06,0.42,0.19,0.06,0.11,joy
3,1F31E,🌞,sun with face,0.00,0.22,0.00,0.00,0.78,0.00,0.11,0.22,joy
4,1F31F,🌟,glowing star,0.00,0.28,0.00,0.00,0.53,0.00,0.25,0.31,joy
...,...,...,...,...,...,...,...,...,...,...,...,...
145,2757,❗,exclamation mark,0.44,0.42,0.31,0.42,0.08,0.17,0.81,0.11,surprise
146,2764,❤,red heart,0.00,0.36,0.00,0.00,0.69,0.00,0.14,0.67,joy
147,27A1,➡,right arrow,0.00,0.06,0.00,0.00,0.00,0.00,0.00,0.22,trust
148,2B05,⬅,left arrow,0.17,0.14,0.17,0.14,0.00,0.14,0.03,0.06,anger


In [268]:
# Example
#emotags_df[(emotags_df["name"] == 'confused face')]

In [269]:
# Add the dominant emotion to the user - artwork - emoji data
emojis_emotion_df = emojis_df2.merge(emotags_df[['emoji','dominantEmotion']])
emojis_emotion_df

Unnamed: 0,user,artwork,q4,emoji,dominantEmotion
0,5,Aracne,😍 feelings of love or infatuation,😍,joy
1,6,La Sirena (Sirena) (Abisso verde),😍 feelings of love or infatuation,😍,joy
2,6,Estate (L'amaca),😍 feelings of love or infatuation,😍,joy
3,8,La Sirena (Sirena) (Abisso verde),😍 feelings of love or infatuation,😍,joy
4,9,Daphne (Daphne a Pavarolo),😍 feelings of love or infatuation,😍,joy
...,...,...,...,...,...
472,102,Daphne (Daphne a Pavarolo),😊,😊,joy
473,80,Asfissia! (parte sinistra),😏,😏,anticipation
474,104,Aracne,😬,😬,anticipation
475,112,La Sirena (Sirena) (Abisso verde),"😍 sentimenti di amore o infatuazione, 🙂 legger...",✨,joy


In [270]:
# user x artwork (values: list of dominant emotions based on the emojis)
#user_artwork_emotions_df = pd.pivot_table(user_artwork_emojis_df, values='dominantEmotion', index='user', columns='artwork', fill_value=np.NaN, aggfunc=np.sum)
#user_artwork_emotions_df = pd.pivot_table(user_artwork_emojis_df, values='dominantEmotion', index='user', columns='artwork', fill_value=np.NaN, aggfunc=pd.Series.tolist)
user_artwork_emotions_df = pd.pivot_table(emojis_emotion_df, values='dominantEmotion', index='user', columns='artwork', fill_value=np.NaN, aggfunc=lambda x: ', '.join(x))
user_artwork_emotions_df


artwork,Aprile,Aracne,Asfissia! (parte sinistra),Autoritratto in forma di gufo,Daphne (Daphne a Pavarolo),Estate (L'amaca),Gotine rosse,La Sirena (Sirena) (Abisso verde),La ragazza rossa (Testa di donna dai capelli rossi),"Lo specchio della vita (E ciò che l'una fa, e le altre fanno)",Marina,Mercato Vecchio di Firenze
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
5,sadness,joy,anticipation,,,,,anticipation,,,,
6,sadness,fear,anticipation,fear,,joy,disgust,joy,,disgust,,
7,,,sadness,,joy,,,,disgust,,,
8,disgust,,fear,,disgust,,,joy,anticipation,,joy,joy
9,sadness,fear,anticipation,"anticipation, fear",joy,,anger,fear,fear,,,anticipation
...,...,...,...,...,...,...,...,...,...,...,...,...
121,,anticipation,,,"anticipation, disgust",,,anticipation,,,,"sadness, joy"
122,,joy,"anticipation, sadness",sadness,,joy,,joy,joy,,,
123,sadness,,sadness,fear,sadness,joy,anger,,joy,joy,joy,
124,,,,,,,,fear,fear,joy,,


In [271]:
#--------------------------------------------------------------------------------------------------------------------------
#    Artwork Emotion Similarity (calculate distance matrix)
#--------------------------------------------------------------------------------------------------------------------------

In [272]:
# Calculate distances
# I dont think cosine similarity is the one we should go for since we are using emotion similarity
#distances = cosine_similarity(data)
# Thus, we directly use ArtworkEmotionSimilarity to get the distances
similarityMeasure = ArtworkEmotionSimilarity(user_artwork_emotions_df)
similarityDistanceMatrix = similarityMeasure.matrix_distance()
similarityDistanceMatrix

array([[0.        , 0.1875    , 0.75      , ..., 0.375     , 0.75      ,
        0.16666667],
       [0.1875    , 0.        , 0.75      , ..., 0.29166667, 0.625     ,
        0.25      ],
       [0.75      , 0.75      , 0.        , ..., 0.58333333, 0.75      ,
        0.75      ],
       ...,
       [0.375     , 0.29166667, 0.58333333, ..., 0.        , 0.25      ,
        0.75      ],
       [0.75      , 0.625     , 0.75      , ..., 0.25      , 0.        ,
        0.5       ],
       [0.16666667, 0.25      , 0.75      , ..., 0.75      , 0.5       ,
        0.        ]])

In [273]:
# Similarity Matrix
similarityMatrix = similarityMeasure.matrix_similarity()
similarityMatrix

array([[1.        , 0.8125    , 0.25      , ..., 0.625     , 0.25      ,
        0.83333333],
       [0.8125    , 1.        , 0.25      , ..., 0.70833333, 0.375     ,
        0.75      ],
       [0.25      , 0.25      , 1.        , ..., 0.41666667, 0.25      ,
        0.25      ],
       ...,
       [0.625     , 0.70833333, 0.41666667, ..., 1.        , 0.75      ,
        0.25      ],
       [0.25      , 0.375     , 0.25      , ..., 0.75      , 1.        ,
        0.5       ],
       [0.83333333, 0.75      , 0.25      , ..., 0.25      , 0.5       ,
        1.        ]])

In [274]:
#--------------------------------------------------------------------------------------------------------------------------
#    Agglomerative Clustering
#--------------------------------------------------------------------------------------------------------------------------

In [275]:
# Calculate similarity based on that
community_detection = ExplainedCommunitiesDetection(user_artwork_emotions_df, AgglomerativeCommunityDetection, ArtworkEmotionSimilarity)

In [276]:
#n_communities, users_communities = community_detection.search_all_communities(percentage=0.78)
n_communities, users_communities = community_detection.search_all_communities(percentage=0.35)
users_communities

{5: 1,
 6: 1,
 7: 0,
 8: 0,
 9: 1,
 11: 4,
 12: 1,
 13: 0,
 14: 1,
 15: 1,
 16: 4,
 17: 0,
 18: 1,
 19: 1,
 20: 1,
 21: 1,
 23: 1,
 24: 1,
 25: 1,
 26: 1,
 27: 3,
 28: 0,
 29: 0,
 30: 1,
 31: 1,
 32: 0,
 34: 0,
 35: 0,
 36: 1,
 37: 0,
 39: 0,
 41: 1,
 44: 4,
 45: 0,
 54: 1,
 58: 0,
 61: 1,
 63: 1,
 64: 4,
 65: 0,
 66: 4,
 67: 4,
 68: 1,
 69: 1,
 70: 0,
 71: 0,
 72: 0,
 73: 1,
 74: 0,
 75: 0,
 76: 4,
 77: 0,
 78: 0,
 79: 1,
 80: 1,
 81: 1,
 82: 1,
 84: 0,
 85: 1,
 87: 0,
 88: 3,
 89: 1,
 90: 4,
 91: 2,
 92: 1,
 93: 1,
 94: 0,
 95: 3,
 96: 0,
 97: 0,
 98: 0,
 99: 0,
 101: 1,
 102: 0,
 103: 1,
 104: 1,
 105: 1,
 106: 1,
 107: 0,
 108: 4,
 109: 1,
 110: 1,
 111: 4,
 112: 1,
 113: 4,
 114: 0,
 115: 1,
 116: 0,
 117: 1,
 118: 1,
 119: 4,
 120: 0,
 121: 0,
 122: 1,
 123: 0,
 124: 1,
 126: 1}

In [277]:
community_artwork_emotions_df = user_artwork_emotions_df.copy()
community_artwork_emotions_df['community'] = users_communities.values()
community_artwork_emotions_df.reset_index(inplace=True)
community_artwork_emotions_df

artwork,user,Aprile,Aracne,Asfissia! (parte sinistra),Autoritratto in forma di gufo,Daphne (Daphne a Pavarolo),Estate (L'amaca),Gotine rosse,La Sirena (Sirena) (Abisso verde),La ragazza rossa (Testa di donna dai capelli rossi),"Lo specchio della vita (E ciò che l'una fa, e le altre fanno)",Marina,Mercato Vecchio di Firenze,community
0,5,sadness,joy,anticipation,,,,,anticipation,,,,,1
1,6,sadness,fear,anticipation,fear,,joy,disgust,joy,,disgust,,,1
2,7,,,sadness,,joy,,,,disgust,,,,0
3,8,disgust,,fear,,disgust,,,joy,anticipation,,joy,joy,0
4,9,sadness,fear,anticipation,"anticipation, fear",joy,,anger,fear,fear,,,anticipation,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,121,,anticipation,,,"anticipation, disgust",,,anticipation,,,,"sadness, joy",0
93,122,,joy,"anticipation, sadness",sadness,,joy,,joy,joy,,,,1
94,123,sadness,,sadness,fear,sadness,joy,anger,,joy,joy,joy,,0
95,124,,,,,,,,fear,fear,joy,,,1


In [278]:
# Preparamos los datos de usuarios y distancias
users = community_artwork_emotions_df[['user','community']].values
data = community_artwork_emotions_df.iloc[:,1:-1].values
data

array([['sadness', 'joy', 'anticipation', ..., nan, nan, nan],
       ['sadness', 'fear', 'anticipation', ..., 'disgust', nan, nan],
       [nan, nan, 'sadness', ..., nan, nan, nan],
       ...,
       ['sadness', nan, 'sadness', ..., 'joy', 'joy', nan],
       [nan, nan, nan, ..., 'joy', nan, nan],
       [nan, 'anticipation', 'anticipation, fear', ..., nan, nan, nan]],
      dtype=object)

In [279]:
# Gephi triggers error because user is an int and not a string
# https://stackoverflow.com/questions/17950374/converting-a-column-within-pandas-dataframe-from-int-to-string
users_df = pd.DataFrame(users, columns=['user','community'])
users_df
users_df.head(40)

Unnamed: 0,user,community
0,5,1
1,6,1
2,7,0
3,8,0
4,9,1
5,11,4
6,12,1
7,13,0
8,14,1
9,15,1


In [280]:
# Add information about users
users_csv_df = pd.read_csv('../../data/GAM/clean_data/users.csv',encoding = 'utf-8')
users_csv_df = pd.read_csv('../../data/GAM/clean_data/usersStandardGenre.csv',encoding = 'utf-8')

user_gender = users_df.apply(lambda row: users_csv_df[users_csv_df['id'] == row['user']].gender.values[0], axis=1)
user_gender
user_age = users_df.apply(lambda row: users_csv_df[users_csv_df['id'] == row['user']].age.values[0], axis=1)
user_age
user_language = users_df.apply(lambda row: users_csv_df[users_csv_df['id'] == row['user']].languages.values[0], axis=1)
user_language

0     EN
1     EN
2     EN
3     EN
4     EN
      ..
92    IT
93    IT
94    IT
95    IT
96    IT
Length: 97, dtype: object

In [281]:
# If a column contains NaN (float) the column int values will be changed to float too
users_df['gender'] = user_gender
users_df['age'] = user_age
users_df['language'] = user_language
users_df['age'] = users_df['age'].apply(lambda x: x if x != '?' else np.NaN)
users_df['age'] = users_df['age'].apply(lambda x: x if pd.isna(x) else int(float(x))    )
users_df

Unnamed: 0,user,community,gender,age,language
0,5,1,Male,37.0,EN
1,6,1,Female,41.0,EN
2,7,0,Female,32.0,EN
3,8,0,Female,,EN
4,9,1,Prefer not to say,35.0,EN
...,...,...,...,...,...
92,121,0,Female,32.0,IT
93,122,1,Female,42.0,IT
94,123,0,Female,24.0,IT
95,124,1,Female,51.0,IT


In [282]:
search_df = users_df[(users_df["user"] == '89')]
search_df

Unnamed: 0,user,community,gender,age,language


In [283]:
# Change age to a value in an interval (young: < 30, adult >= 30 <= 65, elderly > 65)
ageIntervals = pd.DataFrame(columns = ['From','To','Value'], data = [[0.0,30.0,'young'],[30.0,65.0,'adult'],[65.0,500.0,'elderly']])
ageIntervals

Unnamed: 0,From,To,Value
0,0.0,30.0,young
1,30.0,65.0,adult
2,65.0,500.0,elderly


In [284]:
# Change age to a value in an interval (young: < 30, adult >= 30 <= 65, elderly > 65)
#ageIntervals = pd.DataFrame(columns = ['From','To','Value'], data = [[0,30,'young'],[30,65,'adult'],[65,500,'elderly']])
#ageIntervals

In [285]:
intervals = ageIntervals.set_index(pd.IntervalIndex.from_arrays(ageIntervals['From'], 
                                                             ageIntervals['To']))['Value']
intervals

(0.0, 30.0]        young
(30.0, 65.0]       adult
(65.0, 500.0]    elderly
Name: Value, dtype: object

In [286]:
users_df['ageGroup'] = users_df['age'].map(intervals)
users_df

Unnamed: 0,user,community,gender,age,language,ageGroup
0,5,1,Male,37.0,EN,adult
1,6,1,Female,41.0,EN,adult
2,7,0,Female,32.0,EN,adult
3,8,0,Female,,EN,
4,9,1,Prefer not to say,35.0,EN,adult
...,...,...,...,...,...,...
92,121,0,Female,32.0,IT,adult
93,122,1,Female,42.0,IT,adult
94,123,0,Female,24.0,IT,young
95,124,1,Female,51.0,IT,adult


In [287]:
search_df = users_df[(users_df["ageGroup"] == 'young')]
search_df
print(len(search_df))

23


In [288]:
users_df['user'] = users_df['user'].apply(str)
users2 = users_df[['user','community','gender','ageGroup','language']].values
users2

array([['5', 1, 'Male', 'adult', 'EN'],
       ['6', 1, 'Female', 'adult', 'EN'],
       ['7', 0, 'Female', 'adult', 'EN'],
       ['8', 0, 'Female', nan, 'EN'],
       ['9', 1, 'Prefer not to say', 'adult', 'EN'],
       ['11', 4, 'Female', 'adult', 'EN'],
       ['12', 1, 'Female', 'adult', 'EN'],
       ['13', 0, 'Male', 'adult', 'EN'],
       ['14', 1, 'Male', 'young', 'EN'],
       ['15', 1, 'Male', 'young', 'EN'],
       ['16', 4, 'Male', 'elderly', 'EN'],
       ['17', 0, 'Male', 'adult', 'EN'],
       ['18', 1, 'Female', 'adult', 'EN'],
       ['19', 1, 'Female', 'adult', 'EN'],
       ['20', 1, 'Male', 'adult', 'EN'],
       ['21', 1, 'Male', 'adult', 'EN'],
       ['23', 1, 'Female', 'young', 'EN'],
       ['24', 1, 'Female', 'adult', 'EN'],
       ['25', 1, 'Agender', 'adult', 'EN'],
       ['26', 1, 'Female', 'adult', 'EN'],
       ['27', 3, 'Female', 'adult', 'EN'],
       ['28', 0, 'Female', 'adult', 'EN'],
       ['29', 0, 'Male', 'adult', 'EN'],
       ['30', 1, 'Female

In [289]:
# Add information about gender/age


In [290]:
# Just to check a community
community_data = community_detection.get_community(0, answer_binary=False,percentage=0.35)
#community_data

In [291]:
# Explaining the community
users_without_community = []
for c in range(n_communities):
    community_data = community_detection.get_community(c, answer_binary=False,percentage=0.35)

    if len(community_data['members']) > 1:

        print('---------------------')
        print('COMMUNITY -', community_data['name'])
        print('\t- N. Members:', len(community_data['members']))
        print('\t- Properties:')

        for k in community_data['properties'].keys():
            #print('\t\t-', k)
            print('\t\t-', k, community_data['properties'][k])
    else:
        users_without_community.extend(community_data['members'])

print('---------------------')
print('N. USERS WITHOUT COMMUNITY -', len(users_without_community))

---------------------
COMMUNITY - 0
	- N. Members: 35
	- Properties:
		- Asfissia! (parte sinistra) sadness
		- Marina joy
---------------------
COMMUNITY - 1
	- N. Members: 46
	- Properties:
		- Asfissia! (parte sinistra) anticipation
---------------------
COMMUNITY - 3
	- N. Members: 3
	- Properties:
		- Aprile sadness
		- Aracne sadness
		- La Sirena (Sirena) (Abisso verde) sadness
---------------------
COMMUNITY - 4
	- N. Members: 12
	- Properties:
		- Aracne sadness
		- La Sirena (Sirena) (Abisso verde) joy
		- Marina disgust
---------------------
N. USERS WITHOUT COMMUNITY - 1


In [292]:
#--------------------------------------------------------------------------------------------------------------------------
#    Explanations for presentation
#--------------------------------------------------------------------------------------------------------------------------

In [293]:
community_artwork_emotions_df

artwork,user,Aprile,Aracne,Asfissia! (parte sinistra),Autoritratto in forma di gufo,Daphne (Daphne a Pavarolo),Estate (L'amaca),Gotine rosse,La Sirena (Sirena) (Abisso verde),La ragazza rossa (Testa di donna dai capelli rossi),"Lo specchio della vita (E ciò che l'una fa, e le altre fanno)",Marina,Mercato Vecchio di Firenze,community
0,5,sadness,joy,anticipation,,,,,anticipation,,,,,1
1,6,sadness,fear,anticipation,fear,,joy,disgust,joy,,disgust,,,1
2,7,,,sadness,,joy,,,,disgust,,,,0
3,8,disgust,,fear,,disgust,,,joy,anticipation,,joy,joy,0
4,9,sadness,fear,anticipation,"anticipation, fear",joy,,anger,fear,fear,,,anticipation,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,121,,anticipation,,,"anticipation, disgust",,,anticipation,,,,"sadness, joy",0
93,122,,joy,"anticipation, sadness",sadness,,joy,,joy,joy,,,,1
94,123,sadness,,sadness,fear,sadness,joy,anger,,joy,joy,joy,,0
95,124,,,,,,,,fear,fear,joy,,,1


In [294]:
users_df

Unnamed: 0,user,community,gender,age,language,ageGroup
0,5,1,Male,37.0,EN,adult
1,6,1,Female,41.0,EN,adult
2,7,0,Female,32.0,EN,adult
3,8,0,Female,,EN,
4,9,1,Prefer not to say,35.0,EN,adult
...,...,...,...,...,...,...
92,121,0,Female,32.0,IT,adult
93,122,1,Female,42.0,IT,adult
94,123,0,Female,24.0,IT,young
95,124,1,Female,51.0,IT,adult


In [295]:
combined_df = users_df.copy()
combined_df


Unnamed: 0,user,community,gender,age,language,ageGroup
0,5,1,Male,37.0,EN,adult
1,6,1,Female,41.0,EN,adult
2,7,0,Female,32.0,EN,adult
3,8,0,Female,,EN,
4,9,1,Prefer not to say,35.0,EN,adult
...,...,...,...,...,...,...
92,121,0,Female,32.0,IT,adult
93,122,1,Female,42.0,IT,adult
94,123,0,Female,24.0,IT,young
95,124,1,Female,51.0,IT,adult


In [296]:
#combined_df = pd.concat([combined_df,community_artwork_emotions_df], axis=1, join='inner')
combined_df2 = pd.concat([combined_df,community_artwork_emotions_df], axis=1, join='inner')
combined_df2.T.drop_duplicates().T
combined_df2

Unnamed: 0,user,community,gender,age,language,ageGroup,user.1,Aprile,Aracne,Asfissia! (parte sinistra),Autoritratto in forma di gufo,Daphne (Daphne a Pavarolo),Estate (L'amaca),Gotine rosse,La Sirena (Sirena) (Abisso verde),La ragazza rossa (Testa di donna dai capelli rossi),"Lo specchio della vita (E ciò che l'una fa, e le altre fanno)",Marina,Mercato Vecchio di Firenze,community.1
0,5,1,Male,37.0,EN,adult,5,sadness,joy,anticipation,,,,,anticipation,,,,,1
1,6,1,Female,41.0,EN,adult,6,sadness,fear,anticipation,fear,,joy,disgust,joy,,disgust,,,1
2,7,0,Female,32.0,EN,adult,7,,,sadness,,joy,,,,disgust,,,,0
3,8,0,Female,,EN,,8,disgust,,fear,,disgust,,,joy,anticipation,,joy,joy,0
4,9,1,Prefer not to say,35.0,EN,adult,9,sadness,fear,anticipation,"anticipation, fear",joy,,anger,fear,fear,,,anticipation,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,121,0,Female,32.0,IT,adult,121,,anticipation,,,"anticipation, disgust",,,anticipation,,,,"sadness, joy",0
93,122,1,Female,42.0,IT,adult,122,,joy,"anticipation, sadness",sadness,,joy,,joy,joy,,,,1
94,123,0,Female,24.0,IT,young,123,sadness,,sadness,fear,sadness,joy,anger,,joy,joy,joy,,0
95,124,1,Female,51.0,IT,adult,124,,,,,,,,fear,fear,joy,,,1


In [297]:
# Search for emotions in La Sirena
artwork = 'La Sirena (Sirena) (Abisso verde)'
artwork = 'Marina'
groups = ['user','community','ageGroup','language','gender']
groups.append(artwork)
search_df = combined_df2[groups]
search_df


Unnamed: 0,user,user.1,community,community.1,ageGroup,language,gender,Marina
0,5,5,1,1,adult,EN,Male,
1,6,6,1,1,adult,EN,Female,
2,7,7,0,0,adult,EN,Female,
3,8,8,0,0,,EN,Female,joy
4,9,9,1,1,adult,EN,Prefer not to say,
...,...,...,...,...,...,...,...,...
92,121,121,0,0,adult,IT,Female,
93,122,122,1,1,adult,IT,Female,
94,123,123,0,0,young,IT,Female,joy
95,124,124,1,1,adult,IT,Female,


In [298]:
search_df2 = search_df[(search_df[artwork] == 'disgust') & (search_df['gender'] == 'Female')]
search_df2
#len(search_df2)

Unnamed: 0,user,user.1,community,community.1,ageGroup,language,gender,Marina
50,76,76,4,4,adult,HE,Female,disgust
62,90,90,4,4,young,HE,Female,disgust


In [299]:
search_df3 = search_df[(search_df[artwork] == 'joy')]
search_df3

Unnamed: 0,user,user.1,community,community.1,ageGroup,language,gender,Marina
3,8,8,0,0,,EN,Female,joy
6,12,12,1,1,adult,EN,Female,joy
7,13,13,0,0,adult,EN,Male,joy
13,19,19,1,1,adult,EN,Female,joy
19,26,26,1,1,adult,EN,Female,joy
20,27,27,3,3,adult,EN,Female,joy
24,31,31,1,1,,EN,Female,joy
26,34,34,0,0,adult,EN,Female,joy
27,35,35,0,0,adult,EN,Female,joy
28,36,36,1,1,adult,EN,Female,joy


In [300]:
search_df4 = search_df[(search_df[artwork] == 'disgust')]
search_df4

Unnamed: 0,user,user.1,community,community.1,ageGroup,language,gender,Marina
22,29,29,0,0,adult,EN,Male,disgust
41,67,67,4,4,adult,HE,Male,disgust
45,71,71,0,0,adult,HE,Male,disgust
50,76,76,4,4,adult,HE,Female,disgust
62,90,90,4,4,young,HE,Female,disgust
65,93,93,1,1,adult,IT,Male,disgust
82,111,111,4,4,adult,IT,Unknown,disgust
90,119,119,4,4,adult,IT,Male,disgust


In [301]:
search_df5 = search_df[(search_df[artwork] == 'sadness')]
search_df5

Unnamed: 0,user,user.1,community,community.1,ageGroup,language,gender,Marina
10,16,16,4,4,elderly,EN,Male,sadness
21,28,28,0,0,adult,EN,Female,sadness


In [302]:
search_df6 = search_df[(search_df['ageGroup'] == 'elderly')]
search_df6

Unnamed: 0,user,user.1,community,community.1,ageGroup,language,gender,Marina
10,16,16,4,4,elderly,EN,Male,sadness
42,68,68,1,1,elderly,HE,Male,
49,75,75,0,0,elderly,HE,Male,
73,102,102,0,0,elderly,IT,Female,joy
89,118,118,1,1,elderly,IT,Female,


In [303]:
emotions = search_df[artwork].tolist()
emotions2 = set(emotions)
emotions2

{'anticipation',
 'anticipation, disgust',
 'anticipation, sadness',
 'disgust',
 'joy',
 'joy, joy',
 nan,
 'sadness'}

In [304]:
#combined_df[[community_artwork_emotions_df.columns()]] = community_artwork_emotions_df[[community_artwork_emotions_df.columns()]]
#combined_df

In [305]:
#--------------------------------------------------------------------------------------------------------------------------
#    JSON
#--------------------------------------------------------------------------------------------------------------------------

In [306]:
import json
import math

In [307]:
users_json_df = users_df[['user','community','ageGroup','language']]
users_json_df

Unnamed: 0,user,community,ageGroup,language
0,5,1,adult,EN
1,6,1,adult,EN
2,7,0,adult,EN
3,8,0,,EN
4,9,1,adult,EN
...,...,...,...,...
92,121,0,adult,IT
93,122,1,adult,IT
94,123,0,young,IT
95,124,1,adult,IT


In [308]:
users_json_df2 = users_json_df.copy()
users_json_df2['ageGroup'] = users_json_df2['ageGroup'].fillna('')
users_json_df2['language'] = users_json_df2['language'].fillna('')
# change column name Category to Pet
users_json_df2 = users_json_df2.rename(columns={"user":"id"})
users_json_df2 = users_json_df2.rename(columns={"community":"group"})
users_json_df2['label'] = users_json_df2['id']
users_json_df2

Unnamed: 0,id,group,ageGroup,language,label
0,5,1,adult,EN,5
1,6,1,adult,EN,6
2,7,0,adult,EN,7
3,8,0,,EN,8
4,9,1,adult,EN,9
...,...,...,...,...,...
92,121,0,adult,IT,121
93,122,1,adult,IT,122
94,123,0,young,IT,123
95,124,1,adult,IT,124


In [309]:
search_df = users_df[(users_json_df2["ageGroup"] == 'elderly')]
search_df
print(len(search_df))

5


In [310]:
# Combine ageGroup and language into explicit_community column
users_json_df3 = users_json_df2.copy()
columns = ['ageGroup','language']
users_json_df3['explicit_community'] = users_json_df3[columns].to_dict(orient='records')
users_json_df3

Unnamed: 0,id,group,ageGroup,language,label,explicit_community
0,5,1,adult,EN,5,"{'ageGroup': 'adult', 'language': 'EN'}"
1,6,1,adult,EN,6,"{'ageGroup': 'adult', 'language': 'EN'}"
2,7,0,adult,EN,7,"{'ageGroup': 'adult', 'language': 'EN'}"
3,8,0,,EN,8,"{'ageGroup': '', 'language': 'EN'}"
4,9,1,adult,EN,9,"{'ageGroup': 'adult', 'language': 'EN'}"
...,...,...,...,...,...,...
92,121,0,adult,IT,121,"{'ageGroup': 'adult', 'language': 'IT'}"
93,122,1,adult,IT,122,"{'ageGroup': 'adult', 'language': 'IT'}"
94,123,0,young,IT,123,"{'ageGroup': 'young', 'language': 'IT'}"
95,124,1,adult,IT,124,"{'ageGroup': 'adult', 'language': 'IT'}"


In [311]:
# https://stackoverflow.com/questions/65357356/converting-pandas-dataframe-to-json
# https://stackoverflow.com/questions/59741934/python-pandas-merge-multiple-columns-into-a-dictionary-column
#users_json_df6 = users_json_df5.groupby(['id','label','group']).apply(lambda x: x.to_dict(orient='r'))
# users_json_df7 = users_json_df5.groupby(['id','label','group'])[['ageGroup','language']].apply(lambda x: x.to_dict(orient='records')).reset_index(name='explicit_community').to_dict(orient='records')
#users_json_df8 = users_json_df5.groupby(['id','label','group'])[['ageGroup','language']].apply(lambda x: x.to_dict(orient='records')).reset_index(name='explicit_community').to_dict(orient='records')


In [312]:
# Export community information to JSON format
communityJson = {}

In [313]:
# Community Data
communityJson['communities'] = []

for c in range(n_communities):
    community_data = community_detection.get_community(c, answer_binary=False,percentage=0.35)

    communityDictionary = {}
    communityDictionary['community-type'] = 'implicit'
    communityDictionary['name'] = 'Community ' + str(c)
    
    if len(community_data['members']) > 1:
        communityPropertiesList = []
        for k in community_data['properties'].keys():
            #print('\t\t-', k)
            #communityProperties += '\t\t-' + ' ' + str(k) + ' ' + community_data['properties'][k] + '\n'
            communityPropertiesList.append("'" + str(k) + "'"  + ': ' + "'" + community_data['properties'][k] + "'")
        communityProperties = 'Similar dominant emotions while interacting with the following artworks: {'
        communityProperties += '; '.join(communityPropertiesList)
        communityProperties += '}'
        
    else:
        communityProperties = 'Users without community'
        
    communityDictionary['explanation'] = communityProperties
    #communityDictionary[name]['users'] = 
    
    
    communityDictionary['users'] = []
    for user in community_data['members']:
        communityDictionary['users'].append(str(user))
    
    communityJson['communities'].append(communityDictionary)


In [314]:
# User Data
communityJson["users"] = []
communityJson['users'] = users_json_df3[['id','label','group','explicit_community']].to_dict('records')
communityJson

{'communities': [{'community-type': 'implicit',
   'name': 'Community 0',
   'explanation': "Similar dominant emotions while interacting with the following artworks: {'Asfissia! (parte sinistra)': 'sadness'; 'Marina': 'joy'}",
   'users': ['7',
    '8',
    '13',
    '17',
    '28',
    '29',
    '32',
    '34',
    '35',
    '37',
    '39',
    '45',
    '58',
    '65',
    '70',
    '71',
    '72',
    '74',
    '75',
    '77',
    '78',
    '84',
    '87',
    '94',
    '96',
    '97',
    '98',
    '99',
    '102',
    '107',
    '114',
    '116',
    '120',
    '121',
    '123']},
  {'community-type': 'implicit',
   'name': 'Community 1',
   'explanation': "Similar dominant emotions while interacting with the following artworks: {'Asfissia! (parte sinistra)': 'anticipation'}",
   'users': ['5',
    '6',
    '9',
    '12',
    '14',
    '15',
    '18',
    '19',
    '20',
    '21',
    '23',
    '24',
    '25',
    '26',
    '30',
    '31',
    '36',
    '41',
    '54',
    '61',
 

In [315]:
# Similarity Data
communityJson['similarity'] = []    
# users
for i in range(len(similarityDistanceMatrix)):
    for j in range(i+1,len(similarityDistanceMatrix[i])):
        dicti = {}
        dicti['u1'] = str(user_artwork_emotions_df.index[i])
        dicti['u2'] = str(user_artwork_emotions_df.index[j])
        #dicti['value'] = similarityMatrix[i][j]
        dicti['value'] = math.floor( (similarityMatrix[i][j] * 100 ) ) / 100
        communityJson['similarity'].append(dicti)
    
    

    
    





In [316]:
with open("agglomerativeClustering.json", "w") as outfile:
    json.dump(communityJson, outfile, indent=4)

In [317]:
"""
# Export community information to JSON format
communityJson = {}
communityJson['communities'] = []

for c in range(n_communities):
    community_data = community_detection.get_community(c, answer_binary=False,percentage=0.35)
    communityDictionary = {}
    name = community_data['name']
    communityDictionary[name] = {}
    communityDictionary[name]['community-type'] = 'implicit'
    communityDictionary[name]['name'] = 'Emotions felt while interacting with same artworks'
    
    if len(community_data['members']) > 1:
        communityProperties = 'People feeling the following emotions when they interacted with the artworks:\n'
        for k in community_data['properties'].keys():
            #print('\t\t-', k)
            communityProperties += '\t\t-' + ' ' + str(k) + ' ' + community_data['properties'][k] + '\n'
        communityDictionary[name]['explanation'] = communityProperties
    
    #communityDictionary[name]['users'] = 
    
    
    communityDictionary[name]['users'] = []
    for user in community_data['members']:
        communityDictionary['users'].append(str(user))
    
    communityJson['communities'].append(communityDictionary)


    
communityJson['similarity'] = []    
# users
for i in range(len(similarityDistanceMatrix)):
    for j in range(len(similarityDistanceMatrix[i])):
        dicti = {}
        dicti['u1'] = str(user_artwork_emotions_df.index[i])
        dicti['u2'] = str(user_artwork_emotions_df.index[j])
        dicti['value'] = similarityDistanceMatrix[i][j]
        communityJson['similarity'].append(dicti)
    
    

    
    
with open("sample.json", "w") as outfile:
    json.dump(communityJson, outfile, indent=4)
"""



'\n# Export community information to JSON format\ncommunityJson = {}\ncommunityJson[\'communities\'] = []\n\nfor c in range(n_communities):\n    community_data = community_detection.get_community(c, answer_binary=False,percentage=0.35)\n    communityDictionary = {}\n    name = community_data[\'name\']\n    communityDictionary[name] = {}\n    communityDictionary[name][\'community-type\'] = \'implicit\'\n    communityDictionary[name][\'name\'] = \'Emotions felt while interacting with same artworks\'\n    \n    if len(community_data[\'members\']) > 1:\n        communityProperties = \'People feeling the following emotions when they interacted with the artworks:\n\'\n        for k in community_data[\'properties\'].keys():\n            #print(\'\t\t-\', k)\n            communityProperties += \'\t\t-\' + \' \' + str(k) + \' \' + community_data[\'properties\'][k] + \'\n\'\n        communityDictionary[name][\'explanation\'] = communityProperties\n    \n    #communityDictionary[name][\'users\'] 

In [318]:
similarityDistanceMatrix

array([[0.        , 0.1875    , 0.75      , ..., 0.375     , 0.75      ,
        0.16666667],
       [0.1875    , 0.        , 0.75      , ..., 0.29166667, 0.625     ,
        0.25      ],
       [0.75      , 0.75      , 0.        , ..., 0.58333333, 0.75      ,
        0.75      ],
       ...,
       [0.375     , 0.29166667, 0.58333333, ..., 0.        , 0.25      ,
        0.75      ],
       [0.75      , 0.625     , 0.75      , ..., 0.25      , 0.        ,
        0.5       ],
       [0.16666667, 0.25      , 0.75      , ..., 0.75      , 0.5       ,
        0.        ]])

In [319]:
    
"""
{

    "community-type": "explicit",
    "name": "elderly",
    "id": "d290f1ee-6c54-4b01-90e6-d701748f0851",
    "explanation": "People whose age is above 65",
    "users": 

    [
        "23",
        "24"
    ]

}
"""

'\n{\n\n    "community-type": "explicit",\n    "name": "elderly",\n    "id": "d290f1ee-6c54-4b01-90e6-d701748f0851",\n    "explanation": "People whose age is above 65",\n    "users": \n\n    [\n        "23",\n        "24"\n    ]\n\n}\n'

In [320]:
stop

NameError: name 'stop' is not defined

In [None]:
#--------------------------------------------------------------------------------------------------------------------------
#    Agglomerative Clustering (cosine similarity)
#--------------------------------------------------------------------------------------------------------------------------

In [None]:
# Notes:
# It is not meaningful because user with emotion 1 in X artworks has perfect similarity with another with emotion 5 in same X artworks
# https://datascience.stackexchange.com/questions/51020/cosine-similarity-with-arrays-contaning-nan

In [None]:
user_artwork_emotions_df

In [None]:
# Change pivot table to index (emotion)
user_artwork_emotions_cosine_df = user_artwork_emotions_df.copy()
user_artwork_emotions_cosine_df

In [None]:
user_artwork_emotions_cosine_df2 = user_artwork_emotions_cosine_df.iloc[:,0:].apply(lambda x: x.str.split(pat=', ',expand=False).str[0], axis=1)
user_artwork_emotions_cosine_df2

In [None]:
PLUTCHIK_EMOTIONS = ['anger', 'anticipation', 'joy', 'trust', 'fear', 'surprise', 'sadness', 'disgust']
counter = 0
def plutchikEmotionIndex(emotion):
    global counter
    #print("\ncounter: " + str(counter))
    counter += 1
    if pd.isna(emotion):
        #print("nan")
        return emotion
    elif(PLUTCHIK_EMOTIONS.index(emotion)):
        #print(emotion)
        # print('emotion: ' + str(emotion))
        #print("dragon")
        result = PLUTCHIK_EMOTIONS.index(emotion)
        #print("hey")
        #print("phoenix")
        # print('result: ' + str(result))
        return result

In [None]:
user_artwork_emotions_cosine_df3 = user_artwork_emotions_cosine_df2.iloc[0:,0:].apply(np.vectorize(plutchikEmotionIndex,otypes=[float]))
user_artwork_emotions_cosine_df3

In [None]:
user_artwork_emotions_cosine_df4 = user_artwork_emotions_cosine_df3.copy()
user_artwork_emotions_cosine_df4.fillna(999.0,inplace=True)
user_artwork_emotions_cosine_df4

In [None]:
agglomerativeCosine_community_detection = ExplainedCommunitiesDetection(user_artwork_emotions_cosine_df4, AgglomerativeCommunityDetection, 'cosine')

In [None]:
agglomerativeCosine_n_communities, agglomerativeCosine_users_communities = agglomerativeCosine_community_detection.search_all_communities(percentage=0.78)
agglomerativeCosine_users_communities

In [None]:
agglomerativeCosine_community_artwork_emotions_df = user_artwork_emotions_df.copy()
agglomerativeCosine_community_artwork_emotions_df['community'] = agglomerativeCosine_users_communities.values()
agglomerativeCosine_community_artwork_emotions_df.reset_index(inplace=True)
agglomerativeCosine_community_artwork_emotions_df

In [None]:
# Preparamos los datos de usuarios y distancias
agglomerativeCosine_users = agglomerativeCosine_community_artwork_emotions_df[['user','community']].values
data = agglomerativeCosine_community_artwork_emotions_df.iloc[:,1:-1].values
data

In [None]:
# Gephi triggers error because user is an int and not a string
# https://stackoverflow.com/questions/17950374/converting-a-column-within-pandas-dataframe-from-int-to-string
agglomerativeCosine_users_df = pd.DataFrame(agglomerativeCosine_users, columns=['user','community'])
agglomerativeCosine_users_df['user'] = agglomerativeCosine_users_df['user'].apply(str)
agglomerativeCosine_users_df
agglomerativeCosine_users2 = agglomerativeCosine_users_df[['user','community']].values
#users2

In [None]:
# Explaining the community
agglomerativeCosine_users_without_community = []
for c in range(n_communities):
    agglomerativeCosine_community_data = agglomerativeCosine_community_detection.get_community(c, answer_binary=False,percentage=0.35)

    if len(agglomerativeCosine_community_data['members']) > 1:

        print('---------------------')
        print('COMMUNITY -', agglomerativeCosine_community_data['name'])
        print('\t- N. Members:', len(agglomerativeCosine_community_data['members']))
        print('\t- Properties:')

        for k in agglomerativeCosine_community_data['properties'].keys():
            #print('\t\t-', k)
            print('\t\t-', k, agglomerativeCosine_community_data['properties'][k])
    else:
        agglomerativeCosine_users_without_community.extend(agglomerativeCosine_community_data['members'])

print('---------------------')
print('N. USERS WITHOUT COMMUNITY -', len(agglomerativeCosine_users_without_community))

In [None]:
#--------------------------------------------------------------------------------------------------------------------------
#    Kmedoids Clustering (notebook implemenation)
#--------------------------------------------------------------------------------------------------------------------------

In [None]:
from pyclustering.cluster.kmedoids import kmedoids
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer

In [None]:
# Initialize initial medoids using K-Means++ algorithm
#initial_medoids = kmeans_plusplus_initializer(similarityData, 2).initialize(return_index=True)
initial_medoids = [1,3,5,7,9]
initial_medoids

In [None]:
# Initialize initial medoids using K-Means++ algorithm
#initial_medoids = kmeans_plusplus_initializer(artworkEmotionDistanceMatrix, 5).initialize(return_index=True)
initial_medoids

In [None]:
# create K-Medoids algorithm for processing distance matrix instead of points
kmedoids_instance = kmedoids(similarityDistanceMatrix, initial_medoids, data_type='distance_matrix')
 
# run cluster analysis and obtain results
kmedoids_instance.process()
 
clusters = kmedoids_instance.get_clusters()
medoids = kmedoids_instance.get_medoids()

clusters

In [None]:
#--------------------------------------------------------------------------------------------------------------------------
#    Kmedoids Clustering (using KmedoidsCommunityDetection)
#--------------------------------------------------------------------------------------------------------------------------

In [None]:
# KMedoids using the new class
kmedoids_community_detection = ExplainedCommunitiesDetection(user_artwork_emotions_df, KmedoidsCommunityDetection, similarityDistanceMatrix)

In [None]:
user_artwork_emotions_df.head(20)

In [None]:
kmedoids_n_communities, kmedoids_users_communities = kmedoids_community_detection.search_all_communities(percentage=0.35,finishSearchSet = True)
kmedoids_users_communities

In [None]:
kmedoids_community_artwork_emotions_df = user_artwork_emotions_df.copy()
kmedoids_community_artwork_emotions_df['community'] = kmedoids_users_communities.values()
kmedoids_community_artwork_emotions_df.reset_index(inplace=True)
kmedoids_community_artwork_emotions_df

In [None]:
# Preparamos los datos de usuarios y distancias
kmedoids_users = kmedoids_community_artwork_emotions_df[['user','community']].values
data = kmedoids_community_artwork_emotions_df.iloc[:,1:-1].values
data

In [None]:
# Gephi triggers error because user is an int and not a string
# https://stackoverflow.com/questions/17950374/converting-a-column-within-pandas-dataframe-from-int-to-string
kmedoids_users_df = pd.DataFrame(kmedoids_users, columns=['user','community'])
kmedoids_users_df['user'] = kmedoids_users_df['user'].apply(str)
kmedoids_users_df
kmedoids_users2 = kmedoids_users_df[['user','community']].values
#users2

In [None]:
# Explaining the community
kmedoids_users_without_community = []
for c in range(n_communities):
    kmedoids_community_data = kmedoids_community_detection.get_community(c, answer_binary=False,percentage=0.35)

    if len(kmedoids_community_data['members']) > 1:

        print('---------------------')
        print('COMMUNITY -', kmedoids_community_data['name'])
        print('\t- N. Members:', len(kmedoids_community_data['members']))
        print('\t- Properties:')

        for k in kmedoids_community_data['properties'].keys():
            #print('\t\t-', k)
            print('\t\t-', k, kmedoids_community_data['properties'][k])
    else:
        kmedoids_users_without_community.extend(kmedoids_community_data['members'])

print('---------------------')
print('N. USERS WITHOUT COMMUNITY -', len(kmedoids_users_without_community))

In [None]:
#--------------------------------------------------------------------------------------------------------------------------
#    DBScan Clustering (using KmedoidsCommunityDetection)
#--------------------------------------------------------------------------------------------------------------------------

In [None]:
from community_module.community_detection.dbscanCommunityDetection import DbscanCommunityDetection

In [None]:
similarityDistanceMatrix

In [None]:
user_artwork_emotions_df.head(20)

In [None]:
# KMedoids using the new class
dbscan_community_detection = ExplainedCommunitiesDetection(user_artwork_emotions_df, DbscanCommunityDetection, similarityDistanceMatrix)

In [None]:
dbscan_n_communities, dbscan_users_communities = dbscan_community_detection.search_all_communities(percentage=0.35,finishSearchSet = True)
dbscan_users_communities

In [None]:
#--------------------------------------------------------------------------------------------------------------------------
#    DBScan Clustering (using KmedoidsCommunityDetection)
#--------------------------------------------------------------------------------------------------------------------------

In [None]:
df = pd.read_csv("https://reneshbedre.github.io/assets/posts/tsne/tsne_scores.csv")
df.head(2)

In [None]:
from sklearn.cluster import DBSCAN
import sklearn
clusters = DBSCAN(eps=2.5, min_samples=4).fit(df)
# get cluster labels
clusters.labels_
clusters.labels_[50]
len(clusters.labels_)

In [None]:
distanceMatrix = sklearn.metrics.pairwise.euclidean_distances(df,df)
distanceMatrix

In [None]:
clusters = DBSCAN(eps=2.5, min_samples=4,metric='precomputed').fit(distanceMatrix)
clusters.labels_
clusters.labels_[50]
len(clusters.labels_)

In [None]:
similarityDistanceMatrix
len(similarityDistanceMatrix)

In [None]:
clusters = DBSCAN(eps=0.5, min_samples=30,metric='precomputed').fit(similarityDistanceMatrix)
clusters.labels_

In [None]:
import networkx as nx
G = G=nx.from_numpy_matrix(similarityDistanceMatrix)
nx.draw(G)

In [None]:
similarityDistanceMatrix

In [None]:
similarityDistanceMatrix2 = np.multiply(similarityDistanceMatrix,10)
similarityDistanceMatrix2

In [None]:
import networkx as nx
G = G=nx.from_numpy_matrix(similarityDistanceMatrix2)
nx.draw(G)

In [None]:
clusters = DBSCAN(eps=0.00001, min_samples=10,metric='precomputed').fit(similarityDistanceMatrix2)
clusters.labels_

In [None]:
#--------------------------------------------------------------------------------------------------------------------------
#    Gephi Visualization
#--------------------------------------------------------------------------------------------------------------------------

In [None]:
#stop

In [None]:
users2

In [None]:
gv = GephiVisualization(workspace='gamemo')
gv.load_community(users2, similarityDistanceMatrix, users_properties=['community','gender','ageGroup','language'])