Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions src/libAtomVM/bitstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,99 @@ bool bitstring_insert_any_integer(uint8_t *dst, avm_int_t offset, avm_int64_t va
}
return true;
}

bool bitstring_utf8_encode(avm_int_t c, uint8_t *buf, size_t *out_size)
{
size_t sz = 0;
if (c < 0 || c > 0x10FFFF) {
return false;
}
if (c < 0x80) {
if (buf) {
*buf++ = c;
}
sz++;
} else if (c < 0x800) {
if (buf) {
*buf++ = (c >> 6) | 0xC0;
*buf++ = (c & 0x3F) | 0x80;
}
sz += 2;
} else if (c < 0x10000) {
if (buf) {
*buf++ = (c >> 12) | 0xE0;
*buf++ = ((c >> 6) & 0x3F) | 0x80;
*buf++ = (c & 0x3F) | 0x80;
}
sz += 3;
} else {
if (buf) {
*buf++ = (c >> 18) | 0xF0;
*buf++ = ((c >> 12) & 0x3F) | 0x80;
*buf++ = ((c >> 6) & 0x3F) | 0x80;
*buf++ = (c & 0x3F) | 0x80;
}
sz += 4;
}
*out_size = sz;
return true;
}

bool bitstring_utf16_encode(avm_int_t c, uint8_t *buf, enum BitstringFlags bs_flags, size_t *out_size)
{
size_t sz = 0;
if (c < 0 || c > 0x10FFFF) {
return false;
}
if (c < 0x10000) {
// Ignore D800-DFFF range
if (buf) {
if (bs_flags & LittleEndianIntegerMask) {
*buf++ = c & 0xFF;
*buf++ = c >> 8;
} else {
*buf++ = c >> 8;
*buf++ = c & 0xFF;
}
}
sz += 2;
} else {
if (buf) {
c -= 0x10000;
if (bs_flags & LittleEndianIntegerMask) {
*buf++ = ((c >> 10) & 0xFF);
*buf++ = (c >> 18) | 0xD8;
*buf++ = c & 0xFF;
*buf++ = ((c >> 8) & 0x03) | 0xDC;
} else {
*buf++ = (c >> 18) | 0xD8;
*buf++ = ((c >> 10) & 0xFF);
*buf++ = ((c >> 8) & 0x03) | 0xDC;
*buf++ = c & 0xFF;
}
}
sz += 4;
}
*out_size = sz;
return true;
}

bool bitstring_utf32_encode(avm_int_t c, uint8_t *buf, enum BitstringFlags bs_flags)
{
UNUSED(bs_flags);
if (c < 0 || c > 0x10FFFF) {
return false;
}
if (bs_flags & LittleEndianIntegerMask) {
*buf++ = c & 0xFF;
*buf++ = (c >> 8) & 0xFF;
*buf++ = (c >> 16) & 0xFF;
*buf++ = c >> 24;
} else {
*buf++ = c >> 24;
*buf++ = (c >> 16) & 0xFF;
*buf++ = (c >> 8) & 0xFF;
*buf++ = c & 0xFF;
}
return true;
}
113 changes: 113 additions & 0 deletions src/libAtomVM/bitstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,119 @@ static inline bool bitstring_insert_integer(term dst_bin, size_t offset, avm_int
return bitstring_insert_any_integer((uint8_t *) term_binary_data(dst_bin), offset, value, n, bs_flags);
}

/**
* @brief Encode a character to UTF-8.
*
* @param c character to encode
* @param buf the buffer to encode the sring to or NULL to only compute the
* size.
* @param out_size the size in bytes, on output (if not NULL)
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
bool bitstring_utf8_encode(avm_int_t c, uint8_t *buf, size_t *out_size);

/**
* @brief Encode a character to UTF-16.
*
* @param c character to encode
* @param buf the buffer to encode the character to or NULL to only compute the
* size.
* @param bs_flags flags to encode the character (undefined/little/big/native)
* @param out_size the size in bytes, on output (if not NULL)
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
bool bitstring_utf16_encode(avm_int_t c, uint8_t *buf, enum BitstringFlags bs_flags, size_t *out_size);

/**
* @brief Encode a character to UTF-32.
*
* @param c character to encode
* @param buf the buffer to encode the character
* @param bs_flags flags to encode the character (undefined/little/big/native)
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
bool bitstring_utf32_encode(avm_int_t c, uint8_t *buf, enum BitstringFlags bs_flags);

/**
* @brief Compute the size of a character when UTF-8 encoded.
*
* @param c character to encode
* @param out_size the size in bytes, on output
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
static inline bool bitstring_utf8_size(avm_int_t c, size_t *out_size)
{
return bitstring_utf8_encode(c, NULL, out_size);
}

/**
* @brief Compute the size of a unicode character when UTF-16 encoded.
*
* @param c character to encode
* @param out_size the size in bytes, on output
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
static inline bool bitstring_utf16_size(avm_int_t c, size_t *out_size) {
return bitstring_utf16_encode(c, NULL, 0, out_size);
}

/**
* @brief Insert a character in UTF-8 format
*
* @param dst_bin binary to insert to
* @param offset offset, in bits, to where to insert the character
* @param c character to encode
* @param out_size the size in bytes, on output
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
static inline bool bitstring_insert_utf8(term dst_bin, size_t offset, avm_int_t c, size_t *out_size)
{
// size was verified by a bs_utf8_size instruction call
uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + (offset >> 3);
return bitstring_utf8_encode(c, dst, out_size);
}

/**
* @brief Insert a character in UTF-&§ format
*
* @param dst_bin binary to insert to
* @param offset offset, in bits, to where to insert the character
* @param c character to encode
* @param bs_flags flags to encode the character (undefined/little/big/native)
* @param out_size the size in bytes, on output
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
static inline bool bitstring_insert_utf16(term dst_bin, size_t offset, avm_int_t c, enum BitstringFlags bs_flags, size_t *out_size)
{
// size was verified by a bs_utf8_size instruction call
uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + (offset >> 3);
return bitstring_utf16_encode(c, dst, bs_flags, out_size);
}

/**
* @brief Insert a character in UTF-32 format
*
* @param dst_bin binary to insert to
* @param offset offset, in bits, to where to insert the character
* @param c character to encode
* @param bs_flags flags to encode the character (undefined/little/big/native)
* @param out_size the size in bytes, on output
* @return \c true if encoding was successful, \c false if c is not a valid
* unicode character
*/
static inline bool bitstring_insert_utf32(term dst_bin, size_t offset, avm_int_t c, enum BitstringFlags bs_flags)
{
uint8_t *dst = (uint8_t *) term_binary_data(dst_bin) + (offset >> 3);
return bitstring_utf32_encode(c, dst, bs_flags);
}

#ifdef __cplusplus
}
#endif
Expand Down
5 changes: 5 additions & 0 deletions src/libAtomVM/opcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@
#define OP_BS_APPEND 134
#define OP_TRIM 136
#define OP_BS_INIT_BITS 137
#define OP_BS_UTF8_SIZE 144
#define OP_BS_PUT_UTF8 145
#define OP_BS_UTF16_SIZE 146
#define OP_BS_PUT_UTF16 147
#define OP_BS_PUT_UTF32 148
#define OP_RECV_MARK 150
#define OP_RECV_SET 151
#define OP_GC_BIF3 152
Expand Down
Loading