In [33]:
from db.client import PGClient
from collections import ChainMap
from datetime import datetime
import re
import emoji
import pandas as pd
import json

In [None]:
client = PGClient()

In [2]:
uni_emojis_list = map(lambda x: ''.join(x.split()), emoji.UNICODE_EMOJI.keys())
uni_emojis_pt = re.compile('|'.join(re.escape(p) for p in uni_emojis_list))

def clean(c):
    url_pt = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    custom_emoji_pt = '<:\w+:[0-9]+>'
    tag_pt = '<@!?[0-9]+>'
   
    c = str(c)
    c = re.sub(url_pt, '', c)
    c = re.sub(tag_pt, '', c)
    c = re.sub('\n',' ', c)
    c = uni_emojis_pt.sub('', c)
    c = re.sub('"','', c)
    
    emojis = re.findall(custom_emoji_pt,c)
    for e in emojis:
        e_new = e.split(':')[1].lower()
        c = re.sub(e, '', c)

    return c.strip()

In [3]:
def get_messages(first_id=None, last_id=None, by_author=False):
    q_messages = """SELECT json_build_object('id',id,'posted_at',posted_at,
                                             'content',content, 'author_name', author_name,
                                             'author_id', author_id)
                    FROM messages 
                    WHERE author_id NOT IN (155149108183695360, 349547918966915073, 159985870458322944) """
    if first_id:
        q_messages += "AND id >= " + str(first_id)
    if last_id:
        q_messages += "AND id <= " + str(last_id) 
    
    q_messages += "ORDER BY id"

    cursor = client.query(q_messages)
    message = cursor.fetchone()

    if by_author:
        messages = {}
        while message:
            message = message[0]
            author_id = str(message['author_id'])
            del message['author_id']
            del message['author_name']
            if author_id in messages:
                messages[author_id].append(message)
            else:
                messages[author_id] = [message]
            message = cursor.fetchone()
    else:
        messages = []
        while message:
            #message = message[0]
            #message['content'] = clean(message['content'])
            #if message['content']:
            messages.append(message[0])

            message = cursor.fetchone()
    
    return messages

In [4]:
def get_discussions(min_char_len=250, max_time_diff=30):
    q_messages = """SELECT json_build_object('id',id,'posted_at',posted_at,'content',content, 'author_name', author_name) FROM messages 
                    WHERE author_id NOT IN (155149108183695360, 349547918966915073, 159985870458322944)
                    ORDER BY id
                    """ 

    cursor = client.query(q_messages)
    message = cursor.fetchone()
    discussions = [[]]
    previous_message = datetime.strptime(message[0]['posted_at'], '%Y-%m-%dT%H:%M:%S')

    while message:
        message = message[0]
        content = clean(message['content'])
        message_date = datetime.strptime(message['posted_at'], '%Y-%m-%dT%H:%M:%S')
        if content:
            message['content'] = content
            time_diff = (message_date - previous_message).total_seconds() / 60
            if(time_diff > max_time_diff):
                discussions.append([message])

            else:
                discussions[-1:][0].append(message)
        previous_message = message_date
        message = cursor.fetchone()

    contents = []
    for d in discussions:
        if len(d) > min_char_len:
            content = ''
            for message in d:
                content += ' ' + message['content']
            contents.append(content)

    return contents

In [5]:
def extract_content(messages):
    content = ''
    for message in messages:
        content += ' ' + message['content']

    return content

In [6]:
def split_content(content, n):
    i = 0
    c = ''
    contents = []
    for w in content.split(' '):
        if (i + len(w)) <= n:
            c += w + ' '
            i += len(w)
        else:
            contents.append(re.sub('\s+', ' ', c).strip())
            c = w + ' '
            i = len(w)
    contents.append(re.sub('\s+', ' ', c).strip())
        
    return contents

In [7]:
d_trela = get_messages(372759366392348683, 372782278138396673)
d_febo = get_messages(373118183298826243, 373157096436400131)
d_blade = get_messages(389492652934168576, 389536301671776258)
d_radek = get_messages(350181842198200321, 350235040942391308)

In [8]:
n = 3000
d_trela = split_content(clean(extract_content(d_trela)), n)
d_febo = split_content(clean(extract_content(d_febo)), n)
d_blade = split_content(clean(extract_content(d_blade)), n)
d_radek = split_content(clean(extract_content(d_radek)), n)

In [9]:
print(len(d_trela))
print(len(d_febo))
print(len(d_blade))
print(len(d_radek))

5
3
4
4


In [57]:
d_trela_a = get_messages(372759366392348683, 372782278138396673, by_author=True)
d_febo_a = get_messages(373118183298826243, 373157096436400131, by_author=True)
d_blade_a = get_messages(389492652934168576, 389536301671776258, by_author=True)
d_radek_a = get_messages(350181842198200321, 350235040942391308, by_author=True)

In [54]:
print(len(d_trela_a.keys()))
print(len(d_febo_a.keys()))
print(len(d_blade_a.keys()))
print(len(d_radek_a.keys()))

12
10
10
9


In [19]:
len(d_radek_a['156052059555233792'][0])

1721

In [5]:
df = pd.DataFrame(d_contents)
df.to_clipboard(index=False, header=False)

In [20]:
from paralleldots import set_api_key, get_api_key, sentiment, similarity, emotion

In [21]:
set_api_key("iQVOCSTVfHzRd67ybpvqaEWEgCRQefqWuIQfRypVm0I") 


In [38]:
discussions = {'d_trela': d_trela, 'd_febo': d_febo,
               'd_blade': d_blade, 'd_radek': d_radek}
sentiments = []
for name, d in discussions.items():
    d_sentiments = []
    for d_part in d:
        sent = sentiment(d_part)
        emot = emotion(d_part)
        del sent['usage']
        del emot['usage']
        d_sentiments.append({'sentiment': sent, 'emotion': emot})
    sentiments.append((name, d_sentiments))
    #sentiments.append(d_sentiments)
    

In [40]:
with open('discussions_sentiments.json', 'w') as file:
    json.dump(sentiments, file)

In [78]:
sentiments

[[{'probabilities': {'negative': 0.036359,
    'neutral': 0.849968,
    'positive': 0.113673},
   'sentiment': 'neutral',
   'usage': 'By accessing ParallelDots API or using information generated by ParallelDots API, you are agreeing to be bound by the ParallelDots API Terms of Use: http://www.paralleldots.com/terms-and-conditions'},
  {'probabilities': {'negative': 0.967067,
    'neutral': 0.029135,
    'positive': 0.003799},
   'sentiment': 'negative',
   'usage': 'By accessing ParallelDots API or using information generated by ParallelDots API, you are agreeing to be bound by the ParallelDots API Terms of Use: http://www.paralleldots.com/terms-and-conditions'},
  {'probabilities': {'negative': 0.458571,
    'neutral': 0.51089,
    'positive': 0.030539},
   'sentiment': 'neutral',
   'usage': 'By accessing ParallelDots API or using information generated by ParallelDots API, you are agreeing to be bound by the ParallelDots API Terms of Use: http://www.paralleldots.com/terms-and-condit

In [47]:
discussions_authors

[{'156052059555233792': ["Drama Drama happend Basically Storm accidentally put his dick pic Xan replied with a pic with a cum With a cum Fignore trelabot Ik Funigore trelacki Im back U are mentally disabled. Understandable kjx sir In a pretty way yes Kubi emote I have idea Kubi = MingLee 2⃣ 3⃣ plot twist i get promoted to 99 izno pm basem use VPN germany and dont talk no drama then :gg: or that ah limited nice picture *sigh* *unzips* here we go again yes radek priest so i need to know am i getting demoted or no? i believe the logic behind this is if you are higher level than usual (60+), you are suppose to be nice to people and helping them, etc. not being racist OR hurting their feelings. because if you are 60+ you are trusted with a server control therefore there are people who are trusting you. honestly. radek. NO OFFEENSE PLEASE I STILL LOVE YOU. but you shouldn't be 90 IMHO because you are being mean to basem etc. which is clearly not allowed what if there are other tunisian,iraq,

In [51]:
d_trela_a

{'156052059555233792': ["Drama Drama happend Basically Storm accidentally put his dick pic Xan replied with a pic with a cum With a cum Fignore trelabot Ik Funigore trelacki Im back U are mentally disabled. Understandable kjx sir In a pretty way yes Kubi emote I have idea Kubi = MingLee 2⃣ 3⃣ plot twist i get promoted to 99 izno pm basem use VPN germany and dont talk no drama then :gg: or that ah limited nice picture *sigh* *unzips* here we go again yes radek priest so i need to know am i getting demoted or no? i believe the logic behind this is if you are higher level than usual (60+), you are suppose to be nice to people and helping them, etc. not being racist OR hurting their feelings. because if you are 60+ you are trusted with a server control therefore there are people who are trusting you. honestly. radek. NO OFFEENSE PLEASE I STILL LOVE YOU. but you shouldn't be 90 IMHO because you are being mean to basem etc. which is clearly not allowed what if there are other tunisian,iraq, 

In [58]:
discussions_authors = {'d_trela_a': d_trela_a, 'd_febo_a': d_febo_a,
                       'd_blade_a': d_blade_a, 'd_radek_a': d_radek_a}

for name, d in discussions_authors.items():
    for author, content in d.items():
        d[author] = split_content(clean(extract_content(content)), n)



In [59]:
discussions_authors['d_trela_a']

{'156052059555233792': ["Drama Drama happend Basically Storm accidentally put his dick pic Xan replied with a pic with a cum With a cum Fignore trelabot Ik Funigore trelacki Im back U are mentally disabled. Understandable kjx sir In a pretty way yes Kubi emote I have idea Kubi = MingLee 2⃣ 3⃣ plot twist i get promoted to 99 izno pm basem use VPN germany and dont talk no drama then :gg: or that ah limited nice picture *sigh* *unzips* here we go again yes radek priest so i need to know am i getting demoted or no? i believe the logic behind this is if you are higher level than usual (60+), you are suppose to be nice to people and helping them, etc. not being racist OR hurting their feelings. because if you are 60+ you are trusted with a server control therefore there are people who are trusting you. honestly. radek. NO OFFEENSE PLEASE I STILL LOVE YOU. but you shouldn't be 90 IMHO because you are being mean to basem etc. which is clearly not allowed what if there are other tunisian,iraq, 

In [62]:
sentiments_authors2 = []
for d_name, d in discussions_authors.items():
    d_authors = {}
    for author, contents in d.items():
        sentiments_author = {'sentiment': [], 'emotion': []}
        for c in contents:
            sent = sentiment(c)
            emot = emotion(c)
            del sent['usage']
            del emot['usage']
            sentiments_author['sentiment'].append(sent)
            sentiments_author['emotion'].append(emot
        d_authors[author] = sentiments_author
    sentiments_authors2.append((d_name, d_authors))

In [63]:
sentiments_authors2

[('d_blade_a',
  {'156052059555233792': {'emotion': [{'emotion': 'sad',
      'probabilities': {'angry': 0.344047,
       'excited': 0.024107,
       'happy': 0.013658,
       'indifferent': 0.186718,
       'sad': 0.43147},
      'usage': 'By accessing ParallelDots API or using information generated by ParallelDots API, you are agreeing to be bound by the ParallelDots API Terms of Use: http://www.paralleldots.com/terms-and-conditions'}],
    'sentiment': [{'probabilities': {'negative': 0.737774,
       'neutral': 0.243069,
       'positive': 0.019157},
      'sentiment': 'negative',
      'usage': 'By accessing ParallelDots API or using information generated by ParallelDots API, you are agreeing to be bound by the ParallelDots API Terms of Use: http://www.paralleldots.com/terms-and-conditions'}]},
   '169801336190271488': {'emotion': [{'emotion': 'sad',
      'probabilities': {'angry': 0.33811,
       'excited': 0.011835,
       'happy': 0.012285,
       'indifferent': 0.079835,
     

In [34]:
with open('authors_sentiments.json', 'w') as file:
    json.dump(sentiments_authors, file)
