In [None]:
import glob
import json
import pandas as pd
import re

def get_counts(emoji_list):
    d = {}
    for emoji in emoji_list:
        if emoji in d:
            d[emoji] += 1
        else:
            d[emoji] = 1
    return d

def get_emoji_files(channel):
    dir_path = f'awakened_zip/{channel}'
    files = glob.glob(f"{dir_path}/*.json")
    return files

def get_reactions(messages):
    reactions = []
    for message in messages:
        reactions.extend(message.get('reactions', []))
    return reactions

def get_emojis_from_messages(messages):
    all_emojis = []
    for message in messages:
        emojis = re.findall(
            r':[a-z0-9_-]+:',
            message['text']
        )
        all_emojis.extend(emojis)
    return all_emojis

def get_emojis_from_reactions(reactions):
    emojis = []
    for reaction in reactions:
        emojis.append(reaction['name'])
    return emojis

def get_messages(emoji_file):
    messages = json.load(open(emoji_file))

    # Filter out messages which don't have a user field
    messages = [message for message in messages if 'user' in message]

    return messages

def get_emojis(messages):
    reactions = get_reactions(messages)

    message_emojis = get_emojis_from_messages(messages)
    reaction_emojis = get_emojis_from_reactions(reactions)

    return message_emojis + reaction_emojis

def get_emoji_counts(channel, head=-1):
    emoji_files = get_emoji_files(channel)
    emoji_list = []
    for emoji_file in emoji_files[:head]:
        messages = get_messages(emoji_file)
        emojis = get_emojis(messages)
        emoji_list.extend(emojis)
    
    emoji_counts = get_counts(emoji_list)
    return emoji_counts

# Time

1829 total

- 50 -> total: 436 ms
- 1829 -> 15s

# Get All Emojis

In [None]:
emoji_counts = get_emoji_counts('chopping-wood')

# Get Only My Emojis

In [None]:
emoji_files = get_emoji_files('chopping-wood')

emoji_list = []
for emoji_file in emoji_files:
    # Get the messages from the emoji file
    all_messages = get_messages(emoji_file)

    # Get reactions from messages
    all_reactions = get_reactions(all_messages)

    # Filter down the messages to only those with user U02780B5563
    my_messages = [message for message in all_messages if message['user'] == 'U02780B5563']

    # Find all reactions that has user U02780B5563
    my_reactions = [reaction for reaction in all_reactions if 'U02780B5563' in reaction['users']]

    # Get emojis from messages
    message_emojis = get_emojis_from_messages(all_messages)

    # Get emojis from reactions
    reaction_emojis = get_emojis_from_reactions(my_reactions)

    # Combine the two lists of emojis
    emojis = message_emojis + reaction_emojis

    # Extend the list of emojis
    emoji_list.extend(emojis)


# Get the counts of each emoji
emoji_counts = get_counts(emojis)

emoji_counts

# Emoji popularity over all channels

In [None]:
dirs = !cd awakened_zip && ls --color=none -d */
all_channels = [d.rstrip('/') for d in dirs]

# Get the counts for each channel
all_emoji_counts = {}
for channel in all_channels:
    emoji_counts = get_emoji_counts(channel)
    all_emoji_counts[channel] = emoji_counts

# Sum up the counts for each emoji across all channels
total_emoji_counts = {}
for channel, emoji_counts in all_emoji_counts.items():
    for emoji, count in emoji_counts.items():
        if emoji in total_emoji_counts:
            total_emoji_counts[emoji] += count
        else:
            total_emoji_counts[emoji] = count

# Save total emoji counts to emoji_counts
emoji_counts = total_emoji_counts

In [None]:
import pandas as pd

s = pd.Series(emoji_counts).sort_values(ascending=False)
df = s.to_frame().reset_index()
df.columns = ['emoji_name', 'count']

df

In [None]:
df.to_csv('data/emoji-names-and-count.csv', index=False)

In [None]:
!head data/emoji-names-and-count.csv