# Load unicode emojis dataset

In [1]:
import pandas as pd

path_to_dataset = "unicode_emojis_kaggle_df.csv"
try:
    import google.colab
    from google.colab import drive
    drive.mount('/content/drive')

    IN_COLAB = True
except:
    IN_COLAB = False
    
colab_path = "/content/drive/MyDrive/Colab Notebooks/NTU_DA/" #CHANGE THIS

path_to_fonts = "fonts/"
processed_data_folder = "processed/"

if IN_COLAB:
    path_to_dataset = colab_path + path_to_dataset
    path_to_fonts = colab_path + path_to_fonts
    processed_data_folder = colab_path + processed_data_folder

database = pd.read_csv(path_to_dataset)

In [2]:
## USING PILLOW
## Remove rows of emojis not supported by PIL ImageDraw (non-default skin tones)
# skin_tone_labels = ["light skin tone", "medium-light skin tone", "medium skin tone", "medium-dark skin tone", "dark skin tone"]
# emoji_names = database['name'].tolist()

# rows_with_nondefault_tone = []
# for i in range(len(emoji_names)):
#     is_not_default_tone = False
#     for tone_label in skin_tone_labels:
#         if tone_label in emoji_names[i]:
#             is_not_default_tone = True
#     if is_not_default_tone:
#         rows_with_nondefault_tone.append(i)

# database = database.drop(rows_with_nondefault_tone)
all_emojis = database['emoji'].tolist()

# Load the font, get all the emoji bboxes and find the maximum width and height

In [3]:
from PIL import Image, ImageDraw, ImageFont
import PIL
import numpy as np


### Load the font

## bit-map based, src: https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf
font_name = "NotoColorEmoji.ttf"
font_size = 109 # Cannot be changed

## bit-map based, src: https://github.com/samuelngs/apple-emoji-linux/releases
# font_name = "AppleColorEmoji.ttf"
# font_size = 137 # Cannot be changed

## font from here: https://fontsdata.com/132714/segoeuiemoji.htm (for test only), svg-based, works with different font size
# font_name = "seguiemj.ttf"
# font_size = 50 # Can be changed

unicode_font = ImageFont.truetype(path_to_fonts + font_name, font_size)
sample_count = len(all_emojis)

### Get bboxes

## USING PILLOW
# bboxes = np.zeros((len(all_emojis), 4))
# not_supported_cnt = 0
# for i in range(sample_count):
#     for ii in range(4):
#         bbox = unicode_font.getmask(all_emojis[i]).getbbox()
#         if bbox == None: # if emoji is not supported by the font
#             not_supported_cnt += 1
#             print("Warning::font does not support emoji number {}".format(i))
#             bboxes[i, 0] = np.nan
#             continue
#         bboxes[i, ii] = bbox[ii]
# widths  = bboxes[:, 2] - bboxes[:, 0]
# heights = bboxes[:, 3] - bboxes[:, 1]

## USING PILMOJI
from pilmoji import Pilmoji

widths = np.zeros((sample_count))
heights = np.zeros((sample_count))
not_supported_cnt = 0
for i in range(sample_count):
    im = Image.new("RGB", (1, 1), (255, 255, 255))
    size = Pilmoji(im).getsize(text=all_emojis[i], font=unicode_font)

    widths[i] = size[0]
    heights[i] = size[1]

max_width  = int(np.nanmax(widths))
max_height = int(np.nanmax(heights))

print(max_width, max_height)


### Print information to a file

with open(processed_data_folder + 'unicode_emojis_info.txt', 'w') as f:
    print("sample_count:{}\nwidth:{}\nheight:{}".format(len(all_emojis) - not_supported_cnt, max_width, max_height), file=f)

463 109


# Export emojis to images with uniform size

In [None]:
## USING PILLOW
# for i in range(len(all_emojis)):
#     bbox = bboxes[i, :]
#     if not np.isnan(bbox[0]): # Avoid emojis that are not supported by the font
#         im = Image.new("RGB", (max_width, max_height), (255, 255, 255))
    
#         w_margin = (max_width  - (bbox[2] - bbox[0])) // 2 - bbox[0] 
#         h_margin = (max_height - (bbox[3] - bbox[1])) // 2 - bbox[1] 
        
#         ImageDraw.Draw(im).text((w_margin, h_margin), all_emojis[i], font=unicode_font, embedded_color=True)
#         im.save(processed_data_folder + "unicode_emoji_{}.png".format(i))


## USING PILMOJI
for i in range(len(all_emojis)):
    im = Image.new("RGB", (max_width, max_height), (255, 255, 255))
    size = Pilmoji(im).getsize(text=all_emojis[i], font=unicode_font)

    w_margin = (max_width  - size[0]) // 2
    h_margin = (max_height - size[1]) // 2
    
    # font = ImageFont.truetype('arial.ttf', 24)
    Pilmoji(im).text((w_margin, h_margin), all_emojis[i], font=unicode_font, embedded_color=True)
    im.save(processed_data_folder + "unicode_emoji_{}.png".format(i))