# Get `(channel, user, emoji)` counts

### Get `(channel, user, emoji)` counts for messages

In [None]:
from main import get_channels, get_emoji_files, get_messages, get_emojis_from_message
from tqdm import tqdm

# Get all channels
all_channels = get_channels()

# Compute (user, emoji) counts for each channel
message_user_emoji_counts = {}
for channel in tqdm(all_channels, desc='Channels', unit='channel'):
    for emoji_file in get_emoji_files(channel):
        messages = get_messages(emoji_file)
        for message in messages:
            user = message['user']
            emojis = get_emojis_from_message(message)
            for emoji in emojis:
                key = (channel, user, emoji)
                if key not in message_user_emoji_counts:
                    message_user_emoji_counts[key] = 1
                else:
                    message_user_emoji_counts[key] += 1

message_user_emoji_counts

### Get `(channel, user, emoji)` counts for reactions

In [None]:
from main import get_reactions

# Get all reactions from all channels
channel_to_reactions = {}
all_channels = get_channels()
for channel in tqdm(all_channels, desc='Channels', unit='channel'):
    emoji_files = get_emoji_files(channel)
    for emoji_file in emoji_files:
        messages = get_messages(emoji_file)
        for message in messages:
            reactions = get_reactions(message)
            # Add reactions to channel_to_reactions
            if channel not in channel_to_reactions:
                channel_to_reactions[channel] = reactions
            else:
                channel_to_reactions[channel].extend(reactions)

# Create (channel, user, emoji) counts from channel_to_reactions
reactions_user_emoji_counts = {}
for channel, reactions in channel_to_reactions.items():
    for reaction in reactions:
        for user in reaction['users']:
            emoji = reaction['name']
            key = (channel, user, emoji)
            if key not in reactions_user_emoji_counts:
                reactions_user_emoji_counts[key] = 1
            else:
                reactions_user_emoji_counts[key] += 1

reactions_user_emoji_counts

### Combine the counts from `message_user_emoji_counts` and `reactions_user_emoji_counts` together

In [None]:
all_user_emoji_counts = {}
for key, count in message_user_emoji_counts.items():
    if key not in all_user_emoji_counts:
        all_user_emoji_counts[key] = count
    else:
        all_user_emoji_counts[key] += count
        
for key, count in reactions_user_emoji_counts.items():
    if key not in all_user_emoji_counts:
        all_user_emoji_counts[key] = count
    else:
        all_user_emoji_counts[key] += count

all_user_emoji_counts

In [None]:
import pandas as pd

# Make user_emjoi_counts into a list of tuples

user_emoji_counts_list = []
for key, count in all_user_emoji_counts.items():
    user_emoji_counts_list.append((key[0], key[1], key[2], count))

# Make a dataframe from the list of tuples
df = pd.DataFrame(user_emoji_counts_list, columns=['channel', 'user', 'emoji_name', 'count'])
df

# Add URLs

In [None]:
from urls import add_urls

df = add_urls(df)
df

### Resolve names

In [None]:
from main import resolve_names

df = resolve_names(df)

# Drop users who are NaN

df = df.dropna(subset=['user']).reset_index(drop=True)
df

# Emoji Type

In [None]:
import pandas as pd

# Load in data/emojis-and-url.csv

custom_emoji_df = pd.read_csv('data/emojis-and-url.csv')
custom_emoji_df

# Get all of the custom emojis into a set

custom_emoji_set = set(custom_emoji_df['emoji_name'].values)
custom_emoji_set

# Create a new column emoji_type that is either 'custom' or 'official'

df['emoji_type'] = df['emoji_name'].apply(lambda x: 'custom' if x in custom_emoji_set else 'official')

# Rearrange columns to be channel, user, emoji_name, emoji_type, count

df = df[['channel', 'user', 'emoji_name', 'emoji_type', 'emoji_url', 'count']]
df

# Loop over all unique emoji names

# Generate emoji_name: emoji_url dict

emoji_to_type = df[['emoji_name', 'emoji_type']].applymap(lambda x: None if pd.isna(x) else x).drop_duplicates().set_index('emoji_name').to_dict()['emoji_type']
emoji_to_type

# Generate JSON

### Get channel counts

In [None]:
# Group by emoji_name and channel and sum up the count column

emoji_channel_counts = df.groupby(['emoji_name', 'channel'])['count'].sum().reset_index()

# Generate a JSON blob with the emoji_name as the key and a list of {"channel_name": channel, "count": count} as the value

emoji_to_channel_counts_json = emoji_channel_counts\
  .rename(columns={'channel': 'name'})\
  .groupby('emoji_name').apply(lambda x: x[['name', 'count']].to_dict(orient='records')).to_dict()

# Sort emoji_to_channel_counts_json by count

for emoji_name, channel_counts in emoji_to_channel_counts_json.items():
    emoji_to_channel_counts_json[emoji_name] = sorted(channel_counts, key=lambda x: x['count'], reverse=True)

emoji_to_channel_counts_json

### Get user counts

In [None]:
# Group by emoji_name and user and sum up the count column

emoji_user_counts = df.groupby(['emoji_name', 'user'])['count'].sum().reset_index()

# Generate a JSON blob with the emoji_name as the key and a list of {"user_name": user, "count": count} as the value

emoji_to_user_counts_json = emoji_user_counts\
    .rename(columns={'user': 'name'})\
    .groupby('emoji_name').apply(lambda x: x[['name', 'count']].to_dict(orient='records')).to_dict()

# Sort emoji_to_user_counts_json by count

for emoji_name, user_counts in emoji_to_user_counts_json.items():
    emoji_to_user_counts_json[emoji_name] = sorted(user_counts, key=lambda x: x['count'], reverse=True)

emoji_to_user_counts_json

### Generate emoji_name: emoji_url dict

In [None]:
# Generate emoji_name: emoji_url dict

emoji_to_url = df[['emoji_name', 'emoji_url']].applymap(lambda x: None if pd.isna(x) else x).drop_duplicates().set_index('emoji_name').to_dict()['emoji_url']
emoji_to_url

In [None]:
# Read in data/short_name_to_emoji.json

import json

with open('data/short_name_to_emoji.json') as f:
    short_name_to_emoji = json.load(f)

short_name_to_emoji

In [None]:
# Combine emoji_to_channel_counts_json and emoji_to_user_counts_json into a single JSON blob with keys "channels" and "users"

# Convert df to a dict of
#
# {
#   emoji_name1: {
#     "channels": [
#       {"name": channel1, "count": count1},
#       ...
#     ]
#     "users": [
#       {"name": user1, "count": count1},
#       ...
#     ],
#     "url": url,
#   },
#   ...
# }

df = df.applymap(lambda x: None if pd.isna(x) else x)

emoji_to_channel_and_user_counts_json = {}
for emoji_name in df['emoji_name'].unique():
    emoji_to_channel_and_user_counts_json[emoji_name] = {
        'channels': emoji_to_channel_counts_json.get(emoji_name, []),
        'users': emoji_to_user_counts_json.get(emoji_name, []),
        'url': emoji_to_url[emoji_name],
        'type': emoji_to_type[emoji_name],
        'emoji': short_name_to_emoji.get(emoji_name, '?'),
    }

emoji_to_channel_and_user_counts_json

In [None]:
import json

# Dump the JSON blob to data/emojis-to-channels-and-users.json and properly indent it

fname = 'data/emojis-to-channels-and-users.json'

with open(fname, 'w') as f:
    json.dump(emoji_to_channel_and_user_counts_json, f, indent=2)

!head -n 50 $fname

In [None]:
!cp data/emojis-to-channels-and-users.json ../react/src/pages/EmojiPage