# Load unicode emojis dataset

In [1]:
import pandas as pd

path_to_dataset = "unicode_emojis_kaggle_df.csv"
try:
    import google.colab
    from google.colab import drive
    drive.mount('/content/drive')

    IN_COLAB = True
except:
    IN_COLAB = False
    
colab_path = "/content/drive/MyDrive/Colab Notebooks/NTU_AI/" #CHANGE THIS

path_to_fonts = "fonts/"
processed_data_folder = "processed_64x64/"

if IN_COLAB:
    path_to_dataset = colab_path + path_to_dataset
    path_to_fonts = colab_path + path_to_fonts
    processed_data_folder = colab_path + processed_data_folder

database = pd.read_csv(path_to_dataset)
all_emojis = database['emoji'].tolist()

# Load the font, get all the emoji bboxes and find the maximum width and height

In [2]:
from PIL import Image, ImageDraw, ImageFont
import PIL
import numpy as np


### Load the font
# unicode_font = ImageFont.truetype('arial.ttf', 64)
font_name = "seguiemj.ttf"
font_size = 64
unicode_font = ImageFont.truetype(path_to_fonts + font_name, font_size)

sample_count = len(all_emojis)


## USING PILMOJI
from pilmoji import Pilmoji

widths = np.zeros((sample_count))
heights = np.zeros((sample_count))
not_supported_cnt = 0
for i in range(sample_count):
    im = Image.new("RGB", (1, 1), (255, 255, 255))
    size = Pilmoji(im).getsize(text=all_emojis[i], font=unicode_font)

    widths[i] = size[0]
    heights[i] = size[1]

max_width  = int(np.nanmax(widths))
max_height = int(np.nanmax(heights))

print(max_width, max_height)
max_width = 64 #Overwrite width to be 64


### Print information to a file

with open(processed_data_folder + 'unicode_emojis_info.txt', 'w') as f:
    print("sample_count:{}\nwidth:{}\nheight:{}".format(len(all_emojis) - not_supported_cnt, max_width, max_height), file=f)

377 64


# Export emojis to images with uniform size

In [3]:
## USING PILMOJI
for i in range(len(all_emojis)):
    im = Image.new("RGB", (max_width, max_height), (255, 255, 255))
    size = Pilmoji(im).getsize(text=all_emojis[i], font=unicode_font)

    w_margin = (max_width  - size[0]) // 2
    h_margin = (max_height - size[1]) // 2
    
    # font = ImageFont.truetype('arial.ttf', 24)
    Pilmoji(im).text((w_margin, h_margin), all_emojis[i], font=unicode_font, embedded_color=True)
    im.save(processed_data_folder + "unicode_emoji_{}.png".format(i))