flux-framework · mergify · May 17, 2023 · May 16, 2023 · May 16, 2023 · May 16, 2023
diff --git a/doc/man1/flux-jobs.rst b/doc/man1/flux-jobs.rst
@@ -274,6 +274,9 @@ The field names that can be specified are:
 **id.words**
   job ID in mnemonic encoding
 
+**id.emoji**
+  job ID in emoji encoding
+
 **userid**
    job submitter's userid
 

diff --git a/src/bindings/python/flux/job/JobID.py b/src/bindings/python/flux/job/JobID.py
@@ -45,6 +45,7 @@ class JobID(int):
      - dotted hex (dothex) (xxxx.xxxx.xxxx.xxxx)
      - kvs dir (dotted hex with `job.` prefix)
      - RFC19 F58: (Base58 encoding with prefix `ƒ` or `f`)
+     - basemoji (emoji encoding)
 
     A JobID object also has properties for encoding a JOBID into each
     of the above representations, e.g. jobid.f85, jobid.words, jobid.dothex...
@@ -92,6 +93,11 @@ def words(self):
         """Return words (mnemonic) representation of a JobID"""
         return self.encode("words")
 
+    @property
+    def emoji(self):
+        """Return emoji representation of a JobID"""
+        return self.encode("emoji")
+
     @property
     def kvs(self):
         """Return KVS directory path of a JobID"""

diff --git a/src/bindings/python/flux/job/info.py b/src/bindings/python/flux/job/info.py
@@ -605,6 +605,7 @@ def job_fields_to_attrs(fields):
         "id.dec": (),
         "id.hex": (),
         "id.f58": (),
+        "id.emoji": (),
         "id.kvs": (),
         "id.words": (),
         "id.dothex": (),
@@ -697,6 +698,7 @@ class JobInfoFormat(flux.util.OutputFormat):
         "id.dec": "JOBID",
         "id.hex": "JOBID",
         "id.f58": "JOBID",
+        "id.emoji": "JOBID",
         "id.kvs": "JOBID",
         "id.words": "JOBID",
         "id.dothex": "JOBID",

diff --git a/src/common/libjob/id.c b/src/common/libjob/id.c
@@ -88,6 +88,8 @@ int flux_job_id_encode (flux_jobid_t id,
         t = FLUID_STRING_MNEMONIC;
     else if (strcasecmp (type, "f58") == 0)
         t = FLUID_STRING_F58;
+    else if (strcasecmp (type, "emoji") == 0)
+        t = FLUID_STRING_EMOJI;
     else {
         /*  Return EPROTO for invalid type to differentiate from
          *   other invalid arguments.

diff --git a/src/common/libjob/test/job.c b/src/common/libjob/test/job.c
@@ -369,6 +369,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
     { "dothex", 0,     "0000.0000.0000.0000" },
     { "kvs",    0,     "job.0000.0000.0000.0000" },
     { "words",  0,     "academy-academy-academy--academy-academy-academy" },
+    { "emoji",  0,     "😃" },
 #if ASSUME_BROKEN_LOCALE
     { "f58",    0,     "f1" },
 #else
@@ -380,6 +381,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
     { "dothex", 1,     "0000.0000.0000.0001" },
     { "kvs",    1,     "job.0000.0000.0000.0001" },
     { "words",  1,     "acrobat-academy-academy--academy-academy-academy" },
+    { "emoji",  1,     "😄" },
 #if ASSUME_BROKEN_LOCALE
     { "f58",    1,     "f2" },
 #else
@@ -391,6 +393,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
     { "dothex", 65535, "0000.0000.0000.ffff" },
     { "kvs",    65535, "job.0000.0000.0000.ffff" },
     { "words",  65535, "nevada-archive-academy--academy-academy-academy" },
+    { "emoji",  65535, "💁📚" },
 #if ASSUME_BROKEN_LOCALE
     { "f58",    65535, "fLUv" },
 #else
@@ -402,6 +405,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
     { "dothex", 6787342413402046, "0018.1d0d.4d85.0fbe" },
     { "kvs",    6787342413402046, "job.0018.1d0d.4d85.0fbe" },
     { "words",  6787342413402046, "cake-plume-nepal--neuron-pencil-academy" },
+    { "emoji",  6787342413402046, "👴😱🔚🎮🕙🚩" },
 #if ASSUME_BROKEN_LOCALE
     { "f58",    6787342413402046, "fuzzybunny" },
 #else

diff --git a/src/common/libutil/Makefile.am b/src/common/libutil/Makefile.am
@@ -98,7 +98,9 @@ libutil_la_SOURCES = \
 	slice.c \
 	slice.h \
 	strstrip.c \
-	strstrip.h
+	strstrip.h \
+	basemoji.h \
+	basemoji.c
 
 EXTRA_DIST = veb_mach.c
 
@@ -132,7 +134,8 @@ TESTS = test_sha1.t \
 	test_strstrip.t \
 	test_slice.t \
 	test_timestamp.t \
-	test_environment.t
+	test_environment.t \
+	test_basemoji.t
 
 test_ldadd = \
 	$(top_builddir)/src/common/libutil/libutil.la \
@@ -283,3 +286,7 @@ test_timestamp_t_LDADD = $(test_ldadd)
 test_environment_t_SOURCES = test/environment.c
 test_environment_t_CPPFLAGS = $(test_cppflags)
 test_environment_t_LDADD = $(test_ldadd)
+
+test_basemoji_t_SOURCES = test/basemoji.c
+test_basemoji_t_CPPFLAGS = $(test_cppflags)
+test_basemoji_t_LDADD = $(test_ldadd)
diff --git a/src/common/libutil/basemoji.c b/src/common/libutil/basemoji.c
@@ -0,0 +1,228 @@
+/************************************************************\
+ * Copyright 2023 Lawrence Livermore National Security, LLC
+ * (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+ *
+ * This file is part of the Flux resource manager framework.
+ * For details, see https://github.com/flux-framework.
+ *
+ * SPDX-License-Identifier: LGPL-3.0
+\************************************************************/
+
+/* basemoji.c - an emoji encoding for unsigned 64 bit integers
+ */
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "ccan/array_size/array_size.h"
+#include "basemoji.h"
+
+/* Minimum length of a b576 string is 1 emoji, or 4 bytes */
+#define BASEMOJI_MINLEN 4
+
+/* Maximum number of emoji "digits" in a basemoji string is
+ *
+ *  ceil (ln (2^64-1)/ln (576)) = 7
+ *
+ * 4 bytes per emoji, so 4*7 = 28 bytes.
+ */
+#define BASEMOJI_MAXLEN 28
+
+/*  The following is a Selection of 576 emoji in CLDR[1] collation order[2]
+ *  taken from the version 2010 Unicode emoji set[3]. Note: Selected code
+ *  points are all represented in 4 bytes, which is assumed in the
+ *  implementation in this module. Additionally, every character in this
+ *  selected set has a common first two bytes of F0 9F in UTF-8 encoding,
+ *  which aids in detection of a valid basemoji string.
+ *
+ *  1. https://cldr.unicode.org
+ *  2. https://unicode.org/emoji/charts-12.1/emoji-ordering.txt
+ *  3. https://unicode.org/emoji/charts/emoji-versions.html
+ *
+ */
+const char *emojis[] = {
+"😃", "😄", "😁", "😆", "😅", "😂", "😉", "😊", "😍", "😘", "😚", "😋",
+"😜", "😝", "😏", "😒", "😌", "😔", "😪", "😷", "😵", "😲", "😳", "😨",
+"😰", "😥", "😢", "😭", "😱", "😖", "😣", "😞", "😓", "😩", "😫", "😤",
+"😡", "😠", "👿", "💀", "💩", "👹", "👺", "👻", "👽", "👾", "😺", "😸",
+"😹", "😻", "😼", "😽", "🙀", "😿", "😾", "🙈", "🙉", "🙊", "💌", "💘",
+"💝", "💖", "💗", "💓", "💞", "💕", "💟", "💔", "💛", "💚", "💙", "💜",
+"💋", "💯", "💢", "💥", "💫", "💦", "💨", "💬", "💤", "👋", "👌", "👈",
+"👉", "👆", "👇", "👍", "👎", "👊", "👏", "🙌", "👐", "🙏", "💅", "💪",
+"👂", "👃", "👀", "👅", "👄", "👶", "👦", "👧", "👱", "👨", "👩", "👴",
+"👵", "🙍", "🙎", "🙅", "🙆", "💁", "🙋", "🙇", "👮", "💂", "👷", "👸",
+"👳", "👲", "👰", "👼", "🎅", "💆", "💇", "🚶", "🏃", "💃", "👯", "🏂",
+"🏄", "🏊", "🛀", "👫", "💏", "💑", "👪", "👤", "👣", "🐵", "🐒", "🐶",
+"🐩", "🐺", "🐱", "🐯", "🐴", "🐎", "🐮", "🐷", "🐗", "🐽", "🐑", "🐫",
+"🐘", "🐭", "🐹", "🐰", "🐻", "🐨", "🐼", "🐾", "🐔", "🐣", "🐤", "🐥",
+"🐦", "🐧", "🐸", "🐢", "🐍", "🐲", "🐳", "🐬", "🐟", "🐠", "🐡", "🐙",
+"🐚", "🐌", "🐛", "🐜", "🐝", "🐞", "💐", "🌸", "💮", "🌹", "🌺", "🌻",
+"🌼", "🌷", "🌱", "🌴", "🌵", "🌾", "🌿", "🍀", "🍁", "🍂", "🍃", "🍄",
+"🍇", "🍈", "🍉", "🍊", "🍌", "🍍", "🍎", "🍏", "🍑", "🍒", "🍓", "🍅",
+"🍆", "🌽", "🌰", "🍞", "🍖", "🍗", "🍔", "🍟", "🍕", "🍳", "🍲", "🍱",
+"🍘", "🍙", "🍚", "🍛", "🍜", "🍝", "🍠", "🍢", "🍣", "🍤", "🍥", "🍡",
+"🍦", "🍧", "🍨", "🍩", "🍪", "🎂", "🍰", "🍫", "🍬", "🍭", "🍮", "🍯",
+"🍵", "🍶", "🍷", "🍸", "🍹", "🍺", "🍻", "🍴", "🔪", "🌏", "🗾", "🌋",
+"🗻", "🏠", "🏡", "🏢", "🏣", "🏥", "🏦", "🏨", "🏩", "🏪", "🏫", "🏬",
+"🏭", "🏯", "🏰", "💒", "🗼", "🗽", "🌁", "🌃", "🌄", "🌅", "🌆", "🌇",
+"🌉", "🎠", "🎡", "🎢", "💈", "🎪", "🚃", "🚄", "🚅", "🚇", "🚉", "🚌",
+"🚑", "🚒", "🚓", "🚕", "🚗", "🚙", "🚚", "🚲", "🚏", "🚨", "🚥", "🚧",
+"🚤", "🚢", "💺", "🚀", "🕛", "🕐", "🕑", "🕒", "🕓", "🕔", "🕕", "🕖",
+"🕗", "🕘", "🕙", "🕚", "🌑", "🌓", "🌔", "🌕", "🌙", "🌛", "🌟", "🌠",
+"🌌", "🌀", "🌈", "🌂", "🔥", "💧", "🌊", "🎃", "🎄", "🎆", "🎇", "🎈",
+"🎉", "🎊", "🎋", "🎍", "🎎", "🎏", "🎐", "🎑", "🎀", "🎁", "🎫", "🏆",
+"🏀", "🏈", "🎾", "🎳", "🎣", "🎽", "🎿", "🎯", "🔫", "🎱", "🔮", "🎮",
+"🎰", "🎲", "🃏", "🀄", "🎴", "🎭", "🎨", "👓", "👔", "👕", "👖", "👗",
+"👘", "👙", "👚", "👛", "👜", "👝", "🎒", "👞", "👟", "👠", "👡", "👢",
+"👑", "👒", "🎩", "🎓", "💄", "💍", "💎", "🔊", "📢", "📣", "🔔", "🎼",
+"🎵", "🎶", "🎤", "🎧", "📻", "🎷", "🎸", "🎹", "🎺", "🎻", "📱", "📲",
+"📞", "📟", "📠", "🔋", "🔌", "💻", "💽", "💾", "💿", "📀", "🎥", "🎬",
+"📺", "📷", "📹", "📼", "🔍", "🔎", "💡", "🔦", "🏮", "📔", "📕", "📖",
+"📗", "📘", "📙", "📚", "📓", "📒", "📃", "📜", "📄", "📰", "📑", "🔖",
+"💰", "💴", "💵", "💸", "💳", "💹", "📧", "📨", "📩", "📤", "📥", "📦",
+"📫", "📪", "📮", "📝", "💼", "📁", "📂", "📅", "📆", "📇", "📈", "📉",
+"📊", "📋", "📌", "📍", "📎", "📏", "📐", "🔒", "🔓", "🔏", "🔐", "🔑",
+"🔨", "💣", "🔧", "🔩", "🔗", "📡", "💉", "💊", "🚪", "🚽", "🚬", "🗿",
+"🏧", "🚹", "🚺", "🚻", "🚼", "🚾", "🚫", "🚭", "🔞", "🔃", "🔙", "🔚",
+"🔛", "🔜", "🔝", "🔯", "🔼", "🔽", "🎦", "📶", "📳", "📴", "💱", "💲",
+"🔱", "📛", "🔰", "🔟", "🔠", "🔡", "🔢", "🔣", "🔤", "🆎", "🆑", "🆒",
+"🆓", "🆔", "🆕", "🆖", "🆗", "🆘", "🆙", "🆚", "🈁", "🈶", "🈯", "🉐",
+"🈹", "🈚", "🈲", "🉑", "🈸", "🈴", "🈳", "🈺", "🈵", "🔴", "🔵", "🔶",
+"🔷", "🔸", "🔹", "🔺", "🔻", "💠", "🔘", "🔳", "🔲", "🏁", "🚩", "🎌",
+};
+
+bool is_basemoji_string (const char *s)
+{
+    int len = strlen (s);
+
+    /* This code assumes length of emoji array is 576
+     * Generate error at build time if this becomes untrue:
+     */
+    BUILD_ASSERT(ARRAY_SIZE(emojis) == 576);
+
+    /* Check for expected length of a basemoji string, and if the
+     * first two bytes match the expected UTF-8 encoding.
+     * This doesn't guarantee that `s` is a valid basemoji string,
+     * but this will catch most obvious cases and other invalid strings
+     * are left to be detected in decode.
+     */
+    if (len >= BASEMOJI_MINLEN
+        && len <= BASEMOJI_MAXLEN
+        && len % 4 == 0
+        && (uint8_t)s[0] == 0xf0
+        && (uint8_t)s[1] == 0x9f)
+        return true;
+    return false;
+}
+
+/* Encode id into buf in reverse (i.e. higher order bytes are encoded
+ * and placed first into 'buf' since we're doing progressive division.)
+ */
+static int emoji_revenc (char *buf, int buflen, uint64_t id)
+{
+    int index = 0;
+    memset (buf, 0, buflen);
+    if (id == 0) {
+        memcpy (buf, emojis[0], 4);
+        return 4;
+    }
+    while (id > 0) {
+        int rem = id % 576;
+        memcpy (buf+index, emojis[rem], 4);
+        index += 4;
+        id = id / 576;
+    }
+    return index;
+}
+
+int uint64_basemoji_encode (uint64_t id, char *buf, int buflen)
+{
+    int count;
+    int n;
+    char reverse[BASEMOJI_MAXLEN+1];
+
+    if (buf == NULL || buflen <= 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /* Encode bytes to emoji (in reverse), which also gives us a count
+     * of the total bytes required for this encoding.
+     */
+    if ((count = emoji_revenc (reverse, sizeof (reverse), id)) < 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /*  Check for overflow of provided buffer:
+     *  Need space for count bytes for emoji + NUL
+     */
+    if (count + 1 > buflen) {
+        errno = EOVERFLOW;
+        return -1;
+    }
+
+    memset (buf, 0, buflen);
+    n = 0;
+
+    /* Copy 4-byte emojis back in order so that most significant bits are
+     * on the left:
+     */
+    for (int i = count - 4; i >= 0; i-=4) {
+        memcpy (buf+n, reverse+i, 4);
+        n+=4;
+    }
+    return 0;
+}
+
+
+static int basemoji_lookup (const char *c, int *result)
+{
+    for (int i = 0; i < 576; i++) {
+        if (memcmp (c, emojis[i], 4) == 0) {
+            *result = i;
+            return 0;
+        }
+    }
+    errno = EINVAL;
+    return -1;
+}
+
+int uint64_basemoji_decode (const char *str, uint64_t *idp)
+{
+    uint64_t id = 0;
+    uint64_t scale = 1;
+    int len;
+
+    if (str == NULL
+        || idp == NULL
+        || !is_basemoji_string (str)) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /* Move through basemoji string in reverse since least significant
+     * bits are at the end. Since all emoji are 4 bytes, start at 4 from
+     * the end to point to the final emoji.
+     */
+    len = strlen (str);
+    for (int i = len - 4; i >=  0; i-=4) {
+        int c;
+        if (basemoji_lookup (str+i, &c) < 0) {
+            errno = EINVAL;
+            return -1;
+        }
+        id += c * scale;
+        scale *= 576;
+    }
+    *idp = id;
+    return 0;
+}
diff --git a/src/common/libutil/basemoji.h b/src/common/libutil/basemoji.h
@@ -0,0 +1,46 @@
+/************************************************************\
+ * Copyright 2023 Lawrence Livermore National Security, LLC
+ * (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+ *
+ * This file is part of the Flux resource manager framework.
+ * For details, see https://github.com/flux-framework.
+ *
+ * SPDX-License-Identifier: LGPL-3.0
+\************************************************************/
+
+#ifndef _UTIL_BASEMOJI_H
+#define _UTIL_BASEMOJI_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+/*  basemoji - an implementation the RFC 19 FLUID emoji encoding
+ */
+
+/* Convert a 64 bit unsigned integer to basemoji, placing the result
+ * in buffer 'buf' of size 'buflen'.
+ *
+ * Returns 0 on success, -1 on failure with errno set:
+ * EINVAL: Invalid arguments
+ * EOVERFLOW: buffer too small for encoded string
+ */
+int uint64_basemoji_encode (uint64_t id, char *buf, int buflen);
+
+/* Decode a string in basemoji to an unsigned 64 bit integer.
+ *
+ * Returns 0 on success, -1 on failure with errno set:
+ * EINVAL: Invalid arguments
+ */
+int uint64_basemoji_decode (const char *str, uint64_t *idp);
+
+/*  Return true if 's' could be a basemoji string, i.e. it falls
+ *  within the minimum and maximum lengths, and starts with the
+ *  expected bytes.
+ */
+bool is_basemoji_string (const char *s);
+
+#endif /* !_UTIL_BASEMOJI_H */
+
+/*
+ * vi:tabstop=4 shiftwidth=4 expandtab
+ */