In [1]:
from db.client import PGClient
from collections import ChainMap
from datetime import datetime
import re
import emoji
import pandas as pd
import numpy as np
import json
import os

In [8]:
client = PGClient()

In [16]:
a = {'a':1, 'b':2}
b = {'c':3, 'd':4}
c = [(d,v) for d, v in {**a, **b}.items()]
c

[('a', 1), ('d', 4), ('b', 2), ('c', 3)]

In [9]:
uni_emojis_list = map(lambda x: ''.join(x.split()), emoji.UNICODE_EMOJI.keys())
uni_emojis_pt = re.compile('|'.join(re.escape(p) for p in uni_emojis_list))

def clean(c):
    url_pt = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    custom_emoji_pt = '<:\w+:[0-9]+>'
    tag_pt = '<@!?[0-9]+>'
   
    c = str(c)
    c = re.sub(url_pt, '', c)
    c = re.sub(tag_pt, '', c)
    c = re.sub('\n',' ', c)
    c = uni_emojis_pt.sub('', c)
    c = re.sub('"','', c)
    
    emojis = re.findall(custom_emoji_pt,c)
    for e in emojis:
        e_new = e.split(':')[1].lower()
        c = re.sub(e, '', c)

    return c.strip()

In [10]:
def get_messages(first_id=None, last_id=None, by_author=False):
    q_messages = """SELECT json_build_object('id',id,'posted_at',posted_at,
                                             'content',content, 'author_name', author_name,
                                             'author_id', author_id)
                    FROM messages 
                    WHERE author_id NOT IN (155149108183695360, 349547918966915073, 159985870458322944) """
    if first_id:
        q_messages += "AND id >= " + str(first_id)
    if last_id:
        q_messages += "AND id <= " + str(last_id) 
    
    q_messages += "ORDER BY id"

    cursor = client.query(q_messages)
    message = cursor.fetchone()

    if by_author:
        messages = {}
        while message:
            message = message[0]
            author_id = str(message['author_id'])
            del message['author_id']
            del message['author_name']
            if author_id in messages:
                messages[author_id].append(message)
            else:
                messages[author_id] = [message]
            message = cursor.fetchone()
    else:
        messages = []
        while message:
            #message = message[0]
            #message['content'] = clean(message['content'])
            #if message['content']:
            messages.append(message[0])

            message = cursor.fetchone()
    
    return messages

In [12]:
def extract_content(messages):
    content = ''
    for message in messages:
        content += ' ' + message['content']

    return content

In [13]:
def split_content(content, n):
    i = 0
    c = ''
    contents = []
    for w in content.split(' '):
        if (i + len(w)) <= n:
            c += w + ' '
            i += len(w)
        else:
            contents.append(re.sub('\s+', ' ', c).strip())
            c = w + ' '
            i = len(w)
    contents.append(re.sub('\s+', ' ', c).strip())
        
    return contents

In [5]:
def scale_to_one(values):
    return values / values.sum()

In [14]:
d_trela = get_messages(372759366392348683, 372782278138396673)
d_febo = get_messages(373118183298826243, 373157096436400131)
d_blade = get_messages(389492652934168576, 389536301671776258)
d_radek = get_messages(350181842198200321, 350235040942391308)

In [15]:
n = 3000
d_trela = split_content(clean(extract_content(d_trela)), n)
d_febo = split_content(clean(extract_content(d_febo)), n)
d_blade = split_content(clean(extract_content(d_blade)), n)
d_radek = split_content(clean(extract_content(d_radek)), n)

In [16]:
print(len(d_trela))
print(len(d_febo))
print(len(d_blade))
print(len(d_radek))

5
3
4
4


In [17]:
d_trela_a = get_messages(372759366392348683, 372782278138396673, by_author=True)
d_febo_a = get_messages(373118183298826243, 373157096436400131, by_author=True)
d_blade_a = get_messages(389492652934168576, 389536301671776258, by_author=True)
d_radek_a = get_messages(350181842198200321, 350235040942391308, by_author=True)

In [20]:
from paralleldots import set_api_key, get_api_key, sentiment, similarity, emotion

In [21]:
set_api_key(os.environ.get('PARALLELDOTS_API_KEY'))

In [22]:
discussions = {'d_trela': d_trela, 'd_febo': d_febo,
               'd_blade': d_blade, 'd_radek': d_radek}
feelings = []
for name, d in discussions.items():
    d_feelings = []
    for d_part in d:
        sent = sentiment(d_part)
        emot = emotion(d_part)

        d_feelings.append({'s_negative': sent['probabilities']['negative'],
                           's_neutral': sent['probabilities']['neutral'],
                           's_positive': sent['probabilities']['positive'],
                           'e_angry': emot['probabilities']['angry'],
                           'e_excited': emot['probabilities']['excited'],
                           'e_happy': emot['probabilities']['happy'],
                           'e_indifferent': emot['probabilities']['indifferent'],
                           'e_sad': emot['probabilities']['sad']                          
                          })
    feelings.append((name, d_feelings))
    

In [3]:
feelings_authors

[['d_febo_a',
  {'156052059555233792': {'emotion': [{'emotion': 'sad',
      'probabilities': {'angry': 0.374671,
       'excited': 0.020246,
       'happy': 0.008718,
       'indifferent': 0.054402,
       'sad': 0.541963}}],
    'sentiment': [{'probabilities': {'negative': 0.847166,
       'neutral': 0.128604,
       'positive': 0.02423},
      'sentiment': 'negative'}]},
   '242569944821268481': {'emotion': [{'emotion': 'sad',
      'probabilities': {'angry': 0.215411,
       'excited': 0.081702,
       'happy': 0.07362,
       'indifferent': 0.236332,
       'sad': 0.392935}}],
    'sentiment': [{'probabilities': {'negative': 0.404293,
       'neutral': 0.440262,
       'positive': 0.155445},
      'sentiment': 'neutral'}]},
   '269230184643821588': {'emotion': [{'emotion': 'angry',
      'probabilities': {'angry': 0.525988,
       'excited': 0.009746,
       'happy': 0.003878,
       'indifferent': 0.071267,
       'sad': 0.389121}}],
    'sentiment': [{'probabilities': {'negative

In [23]:
with open('discussions_feelings_new.json', 'w') as file:
    json.dump(feelings, file)

In [24]:
discussions_authors = {'d_trela_a': d_trela_a, 'd_febo_a': d_febo_a,
                       'd_blade_a': d_blade_a, 'd_radek_a': d_radek_a}

for name, d in discussions_authors.items():
    for author, content in d.items():
        d[author] = split_content(clean(extract_content(content)), n)



In [25]:
discussions_authors['d_trela_a']

{'156052059555233792': ["Drama Drama happend Basically Storm accidentally put his dick pic Xan replied with a pic with a cum With a cum Fignore trelabot Ik Funigore trelacki Im back U are mentally disabled. Understandable kjx sir In a pretty way yes Kubi emote I have idea Kubi = MingLee 2⃣ 3⃣ plot twist i get promoted to 99 izno pm basem use VPN germany and dont talk no drama then :gg: or that ah limited nice picture *sigh* *unzips* here we go again yes radek priest so i need to know am i getting demoted or no? i believe the logic behind this is if you are higher level than usual (60+), you are suppose to be nice to people and helping them, etc. not being racist OR hurting their feelings. because if you are 60+ you are trusted with a server control therefore there are people who are trusting you. honestly. radek. NO OFFEENSE PLEASE I STILL LOVE YOU. but you shouldn't be 90 IMHO because you are being mean to basem etc. which is clearly not allowed what if there are other tunisian,iraq, 

In [26]:
author_feelings = []
for d_name, d in discussions_authors.items():
    d_authors = {}
    for author, contents in d.items():
        feelings = []
        for c in contents:
            sent = sentiment(c)
            emot = emotion(c)

            feelings.append({'s_negative': sent['probabilities']['negative'],
                                       's_neutral': sent['probabilities']['neutral'],
                                       's_positive': sent['probabilities']['positive'],
                                       'e_angry': emot['probabilities']['angry'],
                                       'e_excited': emot['probabilities']['excited'],
                                       'e_happy': emot['probabilities']['happy'],
                                       'e_indifferent': emot['probabilities']['indifferent'],
                                       'e_sad': emot['probabilities']['sad']                          
                          })
        d_authors[author] = feelings
    author_feelings.append((d_name, d_authors))

In [27]:
with open('author_feelings.json', 'w') as file:
    json.dump(author_feelings, file)


In [3]:
d_author_feelings = json.load(open('d_author_feelings.json', 'r'))
discussion_feelings = json.load(open('discussion_feelings.json', 'r'))

In [11]:
avg_d_feelings = []
for d in discussion_feelings:
    d_name = d[0]
    d_parts = d[1]

    avg_s_neutral = sum([d_part['s_neutral'] for d_part in d_parts]) / len(d_parts)
    avg_s_positive = sum([d_part['s_positive'] for d_part in d_parts]) / len(d_parts) 
    avg_s_negative = sum([d_part['s_negative'] for d_part in d_parts]) / len(d_parts) 
    
    avg_e_sad = sum([d_part['e_sad'] for d_part in d_parts]) / len(d_parts) 
    avg_e_happy = sum([d_part['e_happy'] for d_part in d_parts]) / len(d_parts) 
    avg_e_indifferent = sum([d_part['e_indifferent'] for d_part in d_parts]) / len(d_parts) 
    avg_e_angry = sum([d_part['e_angry'] for d_part in d_parts]) / len(d_parts) 

    avg_s_neutral, avg_s_positive, avg_s_negative = scale_to_one(
                                                    np.array([avg_s_neutral,
                                                              avg_s_positive, avg_s_negative]))
    avg_e_sad, avg_e_happy, avg_e_indifferent, avg_e_angry = scale_to_one(
                                                    np.array([avg_e_sad, avg_e_happy, 
                                                              avg_e_indifferent, avg_e_angry])
                                                            ) 
                                            
    
    avg_d_feelings.append((d_name, avg_s_neutral,
                            avg_s_positive, avg_s_negative, avg_e_sad,
                            avg_e_happy, avg_e_indifferent, avg_e_angry))
#     for d_part in d[1]:
#         print(d_part['sentiment']['probabilities']['negative'])

In [15]:
avg_d_a_feelings = []
for d_a in d_author_feelings:
    d_name = d_a[0]
    authors = d_a[1]
    for author, feelings in authors.items():
        len_feelings = len(feelings)

        avg_s_neutral = sum([f['s_neutral'] for f in feelings]) / len_feelings
        avg_s_positive = sum([f['s_positive'] for f in feelings]) / len_feelings
        avg_s_negative = sum([f['s_negative'] for f in feelings]) / len_feelings

        avg_e_sad = sum([f['e_sad'] for f in feelings]) / len_feelings
        avg_e_happy = sum([f['e_happy'] for f in feelings]) / len_feelings
        avg_e_indifferent = sum([f['e_indifferent'] for f in feelings]) / len_feelings
        avg_e_angry = sum([f['e_angry'] for f in feelings]) / len_feelings
        
        avg_s_neutral, avg_s_positive, avg_s_negative = scale_to_one(
                                                    np.array([avg_s_neutral,
                                                              avg_s_positive, avg_s_negative]))
        avg_e_sad, avg_e_happy, avg_e_indifferent, avg_e_angry = scale_to_one(
                                                    np.array([avg_e_sad, avg_e_happy, 
                                                              avg_e_indifferent, avg_e_angry])
                                                            ) 
     

        avg_d_a_feelings.append((d_name, author, avg_s_neutral,
                                avg_s_positive, avg_s_negative, avg_e_sad,
                                avg_e_happy, avg_e_indifferent, avg_e_angry))

In [16]:
columns = ['discussion_id', 'author_id', 's_neutral', 's_positive', 's_negative',
        'e_sad', 'e_happy', 'e_indifferent', 'e_angry']
df_d_a_feelings = pd.DataFrame(avg_d_a_feelings, columns=columns)

In [17]:
df_d_a_feelings

Unnamed: 0,discussion_id,author_id,s_neutral,s_positive,s_negative,e_sad,e_happy,e_indifferent,e_angry
0,d_blade_a,389496373453520911,0.144572,0.226113,0.629315,0.143382,0.036765,0.375525,0.444328
1,d_blade_a,349468366362247173,0.789211,0.024975,0.185814,0.24307,0.101279,0.563966,0.091684
2,d_blade_a,185846793341370368,0.654655,0.123123,0.222222,0.167982,0.133033,0.497182,0.201804
3,d_blade_a,156052059555233792,0.058,0.134,0.808,0.247202,0.036623,0.28179,0.434385
4,d_blade_a,349641124912168970,0.183816,0.739261,0.076923,0.108559,0.343424,0.321503,0.226514
5,d_blade_a,99247855344025600,0.236,0.685,0.079,0.035753,0.145179,0.783315,0.035753
6,d_blade_a,352526499393568769,0.221221,0.159159,0.61962,0.204782,0.088358,0.316008,0.390852
7,d_blade_a,227488711477166081,0.035,0.082,0.883,0.249495,0.029293,0.140404,0.580808
8,d_blade_a,169801336190271488,0.165,0.01,0.825,0.222787,0.02645,0.316378,0.434385
9,d_blade_a,269230184643821588,0.343,0.092,0.565,0.134796,0.141066,0.556949,0.167189


In [12]:
columns = ['discussion_name', 's_neutral', 's_positive', 's_negative',
        'e_sad', 'e_happy', 'e_indifferent', 'e_angry']
df_d_feelings = pd.DataFrame(avg_d_feelings, columns=columns)

In [13]:
df_d_feelings

Unnamed: 0,discussion_name,s_neutral,s_positive,s_negative,e_sad,e_happy,e_indifferent,e_angry
0,d_febo,0.382206,0.223592,0.394202,0.147814,0.091256,0.410132,0.350798
1,d_trela,0.2482,0.0704,0.6814,0.269792,0.071355,0.29488,0.363973
2,d_blade,0.1865,0.06725,0.74625,0.223805,0.035097,0.274669,0.466429
3,d_radek,0.28168,0.213197,0.505124,0.15681,0.130854,0.4327,0.279636


In [18]:
df_d_a_feelings.to_csv('discussions_author_feelings.csv', sep=',')
df_d_feelings.to_csv('discussions_feelings.csv', sep=',')