Skip to content

Commit

Permalink
Implement String.prototype.normalize
Browse files Browse the repository at this point in the history
JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik robert.fancsik@h-lab.eu
  • Loading branch information
Robert Fancsik committed Dec 13, 2021
1 parent 42523bd commit 5a35a80
Show file tree
Hide file tree
Showing 14 changed files with 277 additions and 40 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/gh-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ jobs:
Conformance_Tests_ES2015:
runs-on: ubuntu-latest
steps:
- run: sudo apt update
- run: sudo apt install libicu-dev
- uses: actions/checkout@v2
- run: $RUNNER --test262-es2015=update
- run: $RUNNER --test262-es2015=update --build-debug
Expand All @@ -99,6 +101,8 @@ jobs:
Conformance_Tests_ESNext:
runs-on: ubuntu-latest
steps:
- run: sudo apt update
- run: sudo apt install libicu-dev
- uses: actions/checkout@v2
- run: $RUNNER --test262-esnext=update
- uses: actions/upload-artifact@v2
Expand All @@ -111,6 +115,8 @@ jobs:
Conformance_Tests_ESNext_Debug_A:
runs-on: ubuntu-latest
steps:
- run: sudo apt update
- run: sudo apt install libicu-dev
- uses: actions/checkout@v2
- run: $RUNNER --test262-esnext=update --build-debug --test262-test-list=built-ins,annexB,harness,intl402
- uses: actions/upload-artifact@v2
Expand All @@ -123,6 +129,8 @@ jobs:
Conformance_Tests_ESNext_Debug_B:
runs-on: ubuntu-latest
steps:
- run: sudo apt update
- run: sudo apt install libicu-dev
- uses: actions/checkout@v2
- run: $RUNNER --test262-esnext=update --build-debug --test262-test-list=language
- uses: actions/upload-artifact@v2
Expand Down
22 changes: 22 additions & 0 deletions jerry-core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(JERRY_ERROR_MESSAGES OFF CACHE BOOL "Enable error mess
set(JERRY_EXTERNAL_CONTEXT OFF CACHE BOOL "Enable external context?")
set(JERRY_PARSER ON CACHE BOOL "Enable javascript-parser?")
set(JERRY_FUNCTION_TO_STRING OFF CACHE BOOL "Enable function toString operation?")
set(JERRY_ICU OFF CACHE BOOL "Enable ICU support?")
set(JERRY_LINE_INFO OFF CACHE BOOL "Enable line info?")
set(JERRY_LOGGING OFF CACHE BOOL "Enable logging?")
set(JERRY_MEM_STATS OFF CACHE BOOL "Enable memory statistics?")
Expand Down Expand Up @@ -78,13 +79,24 @@ if(JERRY_MEM_STATS OR JERRY_PARSER_DUMP_BYTE_CODE OR JERRY_REGEXP_DUMP_BYTE_CODE
set(JERRYRE_LOGGING_MESSAGE " (FORCED BY STATS OR DUMP)")
endif()

# ICU
if(JERRY_ICU)
find_package(ICU REQUIRED COMPONENTS uc)

if(NOT ICU_FOUND)
set(JERRY_ICU OFF)
set(JERRY_ICU_MESSAGE " (FORCED BY MISSING LIBRARY)")
endif()
endif()

# Status messages
message(STATUS "JERRY_CPOINTER_32_BIT " ${JERRY_CPOINTER_32_BIT} ${JERRY_CPOINTER_32_BIT_MESSAGE})
message(STATUS "JERRY_DEBUGGER " ${JERRY_DEBUGGER})
message(STATUS "JERRY_ERROR_MESSAGES " ${JERRY_ERROR_MESSAGES})
message(STATUS "JERRY_EXTERNAL_CONTEXT " ${JERRY_EXTERNAL_CONTEXT})
message(STATUS "JERRY_PARSER " ${JERRY_PARSER})
message(STATUS "JERRY_FUNCTION_TO_STRING " ${JERRY_FUNCTION_TO_STRING})
message(STATUS "JERRY_ICU " ${JERRY_ICU} ${JERRY_ICU_MESSAGE})
message(STATUS "JERRY_LINE_INFO " ${JERRY_LINE_INFO})
message(STATUS "JERRY_LOGGING " ${JERRY_LOGGING} ${JERRY_LOGGING_MESSAGE})
message(STATUS "JERRY_MEM_STATS " ${JERRY_MEM_STATS})
Expand Down Expand Up @@ -641,6 +653,12 @@ if(JERRY_VALGRIND)
set(INCLUDE_CORE_PRIVATE ${INCLUDE_CORE_PRIVATE} ${INCLUDE_THIRD_PARTY_VALGRIND})
endif()

# ICU
jerry_add_define01(JERRY_ICU)
if(JERRY_ICU)
set(INCLUDE_CORE_PRIVATE ${INCLUDE_CORE_PRIVATE} ${ICU_INCLUDE_DIRS})
endif()

# Enable VM execution stop callback
jerry_add_define01(JERRY_VM_HALT)

Expand Down Expand Up @@ -766,6 +784,10 @@ else()
endif()
endif()

if(JERRY_ICU)
target_link_libraries (${JERRY_CORE_NAME} ${ICU_LIBRARIES})
endif()

separate_arguments(EXTERNAL_LINK_LIBS)
foreach(EXT_LIB ${EXTERNAL_LINK_LIBS})
target_link_libraries(${JERRY_CORE_NAME} ${EXT_LIB})
Expand Down
11 changes: 6 additions & 5 deletions jerry-core/ecma/base/ecma-error-messages.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ ECMA_ERROR_DEF (ECMA_ERR_INVALID_REGEXP_FLAGS, "Invalid RegExp flags")
#if JERRY_BUILTIN_JSON
ECMA_ERROR_DEF (ECMA_ERR_JSON_STRINGIFY_ERROR, "JSON stringify error")
#endif /* JERRY_BUILTIN_JSON */
#if JERRY_BUILTIN_STRING && JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_NORMALIZATION_FAILED, "Normalization failed")
#endif /* JERRY_BUILTIN_STRING && JERRY_ESNEXT */
#if JERRY_BUILTIN_REGEXP
ECMA_ERROR_DEF (ECMA_ERR_STACK_LIMIT_EXCEEDED, "Stack limit exceeded")
#endif /* JERRY_BUILTIN_REGEXP */
Expand Down Expand Up @@ -203,6 +206,9 @@ ECMA_ERROR_DEF (ECMA_ERR_EXPECTED_A_FUNCTION_OBJECT, "Expected a function object
#if JERRY_BUILTIN_TYPEDARRAY
ECMA_ERROR_DEF (ECMA_ERR_INVALID_ARRAYBUFFER_LENGTH, "Invalid ArrayBuffer length")
#endif /* JERRY_BUILTIN_TYPEDARRAY */
#if JERRY_BUILTIN_STRING && JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_INVALID_NORMALIZATION_FORM, "Invalid normalization form")
#endif /* JERRY_BUILTIN_STRING && JERRY_ESNEXT */
#if !(JERRY_MODULE_SYSTEM)
ECMA_ERROR_DEF (ECMA_ERR_MODULE_NOT_SUPPORTED, "Module support is disabled")
#endif /* !(JERRY_MODULE_SYSTEM) */
Expand Down Expand Up @@ -547,11 +553,6 @@ ECMA_ERROR_DEF (ECMA_ERR_CONSTRUCTOR_UINT32_ARRAY_REQUIRES_NEW, "Constructor Uin
#if JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_GENERATOR_IS_CURRENTLY_UNDER_EXECUTION, "Generator is currently under execution")
ECMA_ERROR_DEF (ECMA_ERR_ITERATOR_RETURN_RESULT_IS_NOT_OBJECT, "Iterator 'return' result is not object")
#endif /* JERRY_ESNEXT */
#if JERRY_BUILTIN_TYPEDARRAY
ECMA_ERROR_DEF (ECMA_ERR_RETURNED_ARRAYBUFFER_HAS_BEEN_DETACHED, "Returned ArrayBuffer has been detached")
#endif /* JERRY_BUILTIN_TYPEDARRAY */
#if JERRY_ESNEXT
ECMA_ERROR_DEF (ECMA_ERR_SEARCH_STRING_CANNOT_BE_OF_TYPE_REGEXP, "Search string can't be of type: RegExp")
ECMA_ERROR_DEF (ECMA_ERR_VALUE_RECEIVED_BY_YIELD_IS_NOT_OBJECT, "Value received by yield* is not object")
#endif /* JERRY_ESNEXT */
Expand Down
3 changes: 2 additions & 1 deletion jerry-core/ecma/base/ecma-error-messages.ini
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ ECMA_ERR_RESOLVE_MUST_BE_UNDEFINED = "Resolve must be undefined"
ECMA_ERR_RESULT_OF_DEFAULTVALUE_IS_INVALID = "Result of [[DefaultValue]] is invalid"
ECMA_ERR_RETURN_VALUE_IS_NOT_AN_ARRAYBUFFER_OBJECT = "Return value is not an ArrayBuffer object"
ECMA_ERR_RETURN_VALUE_OF_EXEC_MUST_BE_AN_OBJECT_OR_NULL = "Return value of 'exec' must be an object or null"
ECMA_ERR_RETURNED_ARRAYBUFFER_HAS_BEEN_DETACHED = "Returned ArrayBuffer has been detached"
ECMA_ERR_RIGHT_VALUE_OF_IN_MUST_BE_AN_OBJECT = "Right value of 'in' must be an object"
ECMA_ERR_RIGHT_VALUE_OF_INSTANCEOF_MUST_BE_AN_OBJECT = "Right value of 'instanceof' must be an object"
ECMA_ERR_SEARCH_STRING_CANNOT_BE_OF_TYPE_REGEXP = "Search string can't be of type: RegExp"
Expand Down Expand Up @@ -333,3 +332,5 @@ ECMA_ERR_PRIVATE_METHOD_IS_NOT_WRITABLE = "Private method is not writable"
ECMA_ERR_PRIVATE_FIELD_WAS_DEFINED_WITHOUT_A_SETTER = "Private field was defined without a setter"
ECMA_ERR_CANNOT_READ_PRIVATE_MEMBER_TO_AN_OBJECT_WHOSE_CLASS_DID_NOT_DECLARE_IT = "Cannot read private member to an object whose class did not declare it"
ECMA_ERR_PRIVATE_FIELD_WAS_DEFINED_WITHOUT_A_GETTER = "Private field was defined without a getter"
ECMA_ERR_INVALID_NORMALIZATION_FORM = "Invalid normalization form"
ECMA_ERR_NORMALIZATION_FAILED = "Normalization failed"
52 changes: 52 additions & 0 deletions jerry-core/ecma/base/ecma-helpers-string.c
Original file line number Diff line number Diff line change
Expand Up @@ -2805,6 +2805,58 @@ ecma_op_advance_string_index (ecma_string_t *str_p, /**< input string */
} /* ecma_op_advance_string_index */
#endif /* JERRY_ESNEXT */

#if JERRY_ICU
/**
* Copy the string's data into a newly allocated UTF16 encoded buffer
*
* @return pointer to the allocated buffer
*/
uint16_t *
ecma_string_cesu8_to_utf16 (ecma_string_t *str_p, /**< input string */
lit_utf8_size_t *utf16_length_p) /**< [out] utf16 buffer size */
{
lit_utf8_size_t utf8_size;
lit_utf8_size_t utf8_length;
uint8_t flags = ECMA_STRING_FLAG_EMPTY;
const lit_utf8_byte_t *utf8_buffer_p = ecma_string_get_chars (str_p, &utf8_size, &utf8_length, NULL, &flags);
const lit_utf8_byte_t *utf8_buffer_end_p = utf8_buffer_p + utf8_size;

*utf16_length_p = utf8_length;
uint16_t *utf16_buff_p = (uint16_t *) jmem_heap_alloc_block (*utf16_length_p * sizeof (uint16_t));
uint16_t *utf16_buff_iter_p = utf16_buff_p;

while (utf8_buffer_p < utf8_buffer_end_p)
{
*utf16_buff_iter_p++ = (uint16_t) lit_cesu8_read_next (&utf8_buffer_p);
}

if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
{
jmem_heap_free_block ((void *) utf8_buffer_p, utf8_size);
}

return utf16_buff_p;
} /* ecma_string_cesu8_to_utf16 */

/**
* Allocate a new string from UTF16 encoded buffer
*
* @return pointer to the allocated string
*/
ecma_string_t *
ecma_new_ecma_string_from_utf16 (uint16_t *utf16_buff_p, lit_utf8_size_t utf16_length)
{
ecma_stringbuilder_t builder = ecma_stringbuilder_create ();

while (utf16_length--)
{
ecma_stringbuilder_append_codepoint (&builder, *utf16_buff_p++);
}

return ecma_stringbuilder_finalize (&builder);
} /* ecma_new_ecma_string_from_utf16 */
#endif /* JERRY_ICU */

/**
* @}
* @}
Expand Down
4 changes: 4 additions & 0 deletions jerry-core/ecma/base/ecma-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ ecma_string_t *ecma_new_symbol_from_descriptor_string (ecma_value_t string_desc)
bool ecma_prop_name_is_symbol (ecma_string_t *string_p);
ecma_length_t ecma_op_advance_string_index (ecma_string_t *str_p, ecma_length_t index_num, bool is_unicode);
#endif /* JERRY_ESNEXT */
#if JERRY_ICU
uint16_t *ecma_string_cesu8_to_utf16 (ecma_string_t *str_p, lit_utf8_size_t *utf16_length_p);
ecma_string_t *ecma_new_ecma_string_from_utf16 (uint16_t *utf16_buff_p, lit_utf8_size_t utf16_length);
#endif /* JERRY_ICU */
#if JERRY_BUILTIN_CONTAINER
ecma_string_t *ecma_new_map_key_string (ecma_value_t value);
bool ecma_prop_name_is_map_key (ecma_string_t *string_p);
Expand Down
145 changes: 145 additions & 0 deletions jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
#include "ecma-regexp-object.h"
#endif /* JERRY_BUILTIN_REGEXP */

#if JERRY_ICU
#include "unicode/unorm2.h"
#endif /* JERRY_ICU */

#if JERRY_BUILTIN_STRING

#define ECMA_BUILTINS_INTERNAL
Expand Down Expand Up @@ -80,6 +84,7 @@ enum

ECMA_STRING_PROTOTYPE_SUBSTR,

ECMA_STRING_PROTOTYPE_NORMALIZE,
ECMA_STRING_PROTOTYPE_REPEAT,
ECMA_STRING_PROTOTYPE_CODE_POINT_AT,
ECMA_STRING_PROTOTYPE_PAD_START,
Expand Down Expand Up @@ -1226,6 +1231,141 @@ ecma_builtin_string_prototype_object_trim (ecma_string_t *original_string_p) /**

#if JERRY_ESNEXT

/**
* ICU string normalizer instance callback
*/
typedef const UNormalizer2 *(*icu_string_normalizer_instance_cb_t) (UErrorCode *);

/**
* Normalization form descriptor
*/
typedef struct
{
lit_magic_string_id_t kind; /**< kind */
icu_string_normalizer_instance_cb_t instance_cb; /**< normalizer instance callback */
} icu_string_form_normalizer_t;

/**
* Helper macro to register form normalizer entries
*/
#if JERRY_ICU
#define FORM_ENTRY(id, instance_cb) \
{ \
id, instance_cb \
}
#else /* !JERRY_ICU */
#define FORM_ENTRY(id, instance_cb) \
{ \
(id, NULL) \
}
#endif /* JERRY_ICU */

/**
* List of normalization forms
*/
static const icu_string_form_normalizer_t icu_string_normalize_forms[] = {
FORM_ENTRY (LIT_MAGIC_STRING_NFC_U, unorm2_getNFCInstance),
FORM_ENTRY (LIT_MAGIC_STRING_NFD_U, unorm2_getNFDInstance),
FORM_ENTRY (LIT_MAGIC_STRING_NFKC_U, unorm2_getNFKCInstance),
FORM_ENTRY (LIT_MAGIC_STRING_NFKD_U, unorm2_getNFKDInstance)
};

#undef FORM_ENTRY

/**
* The String.prototype object's 'normalize' routine
*
* See also:
* ECMA-262 v12, 22.1.3.13
*
* @return ecma value
* Returned value must be freed with ecma_free_value.
*/
static ecma_value_t
ecma_builtin_string_prototype_object_normalize (ecma_string_t *original_string_p, /**< this argument */
ecma_value_t form_value) /**< normalization from */
{
icu_string_normalizer_instance_cb_t normalizer_instance_cb = unorm2_getNFCInstance;

if (!ecma_is_value_undefined (form_value))
{
ecma_string_t *form_p = ecma_op_to_string (form_value);

if (JERRY_UNLIKELY (form_p == NULL))
{
return ECMA_VALUE_ERROR;
}

size_t forms_size = sizeof (icu_string_normalize_forms) / sizeof (icu_string_normalize_forms[0]);
uint32_t form_idx = 0;

for (; form_idx < forms_size; form_idx++)
{
if (ecma_compare_ecma_string_to_magic_id (form_p, icu_string_normalize_forms[form_idx].kind))
{
normalizer_instance_cb = icu_string_normalize_forms[form_idx].instance_cb;
break;
}
}

ecma_deref_ecma_string (form_p);

if (form_idx >= forms_size)
{
return ecma_raise_range_error (ECMA_ERR_INVALID_NORMALIZATION_FORM);
}
}

#if JERRY_ICU
JERRY_ASSERT (normalizer_instance_cb != NULL);
size_t string_size = ecma_string_get_size (original_string_p);

if (string_size == 0)
{
#endif /* JERRY_ICU */
ecma_ref_ecma_string (original_string_p);
return ecma_make_string_value (original_string_p);
#if JERRY_ICU
}
#endif /* JERRY_ICU */

UErrorCode status = U_ZERO_ERROR;
const UNormalizer2 *normalizer_cb = normalizer_instance_cb (&status);

if (!U_FAILURE (status))
{
ecma_value_t result = ECMA_VALUE_ERROR;

lit_utf8_size_t length;
uint16_t *buffer_p = ecma_string_cesu8_to_utf16 (original_string_p, &length);
int32_t norm_length = unorm2_normalize (normalizer_cb, buffer_p, (int32_t) length, NULL, 0, &status);

if (!U_FAILURE (status) || status == U_BUFFER_OVERFLOW_ERROR)
{
uint16_t *norm_buff_p = (uint16_t *) jmem_heap_alloc_block ((uint32_t) norm_length * sizeof (uint16_t));

status = U_ZERO_ERROR;
norm_length = unorm2_normalize (normalizer_cb, buffer_p, (int32_t) length, norm_buff_p, norm_length, &status);

if (!U_FAILURE (status))
{
result = ecma_make_string_value (ecma_new_ecma_string_from_utf16 (norm_buff_p, (uint32_t) norm_length));
}

jmem_heap_free_block (norm_buff_p, (uint32_t) norm_length * sizeof (uint16_t));
}

jmem_heap_free_block (buffer_p, length * sizeof (uint16_t));

if (!ECMA_IS_VALUE_ERROR (result))
{
return result;
}
}

return ecma_raise_type_error (ECMA_ERR_NORMALIZATION_FAILED);
} /* ecma_builtin_string_prototype_object_normalize */

/**
* The String.prototype object's 'repeat' routine
*
Expand Down Expand Up @@ -1570,6 +1710,11 @@ ecma_builtin_string_prototype_dispatch_routine (uint8_t builtin_routine_id, /**<
}
#endif /* JERRY_BUILTIN_ANNEXB */
#if JERRY_ESNEXT
case ECMA_STRING_PROTOTYPE_NORMALIZE:
{
ret_value = ecma_builtin_string_prototype_object_normalize (string_p, arg1);
break;
}
case ECMA_STRING_PROTOTYPE_REPEAT:
{
ret_value = ecma_builtin_string_prototype_object_repeat (string_p, arg1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ ROUTINE (LIT_MAGIC_STRING_SUBSTR, ECMA_STRING_PROTOTYPE_SUBSTR, 2, 2)
#endif /* JERRY_BUILTIN_ANNEXB */

#if JERRY_ESNEXT
ROUTINE (LIT_MAGIC_STRING_NORMALIZE, ECMA_STRING_PROTOTYPE_NORMALIZE, 1, 0)
ROUTINE (LIT_MAGIC_STRING_REPEAT, ECMA_STRING_PROTOTYPE_REPEAT, 1, 1)
ROUTINE (LIT_MAGIC_STRING_STARTS_WITH, ECMA_STRING_PROTOTYPE_STARTS_WITH, 2, 1)
ROUTINE (LIT_MAGIC_STRING_INCLUDES, ECMA_STRING_PROTOTYPE_INCLUDES, 2, 1)
Expand Down

0 comments on commit 5a35a80

Please sign in to comment.