Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement String.prototype.search, and some minor regexp refactors. #504

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -67,30 +67,13 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
{
ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value);

ecma_object_t *obj_p = ecma_get_object_from_value (obj_this);
ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);

ECMA_TRY_CATCH (input_str_value,
ecma_op_to_string (arg),
ret_value);

ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value);

/* Convert ecma_String_t *to regexp_bytecode_t* */
lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p);

MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size, lit_utf8_byte_t);

ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size);
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_utf8_buffer_p, input_str_size);

ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, &iter);

MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false);

ECMA_FINALIZE (input_str_value);

ECMA_FINALIZE (obj_this);
}

Expand Down
110 changes: 91 additions & 19 deletions jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp
Expand Up @@ -31,6 +31,10 @@
#include "jrt-libc-includes.h"
#include "lit-char-helpers.h"

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
#include "ecma-regexp-object.h"
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN

#define ECMA_BUILTINS_INTERNAL
Expand Down Expand Up @@ -560,15 +564,10 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg

JERRY_ASSERT (ecma_is_value_boolean (global_value));

ecma_value_t exec_arguments[1] = { this_to_string_value };

if (!ecma_is_value_true (global_value))
{
/* 7. */
ret_value = ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
regexp_value,
exec_arguments,
1);
ret_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false);
}
else
{
Expand Down Expand Up @@ -608,10 +607,7 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg
{
/* 8.f.i. */
ECMA_TRY_CATCH (exec_value,
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
regexp_value,
exec_arguments,
1),
ecma_regexp_exec_helper (regexp_value, this_to_string_value, false),
ret_value);

if (ecma_is_value_null (exec_value))
Expand Down Expand Up @@ -829,13 +825,10 @@ ecma_builtin_string_prototype_object_replace_match (ecma_builtin_replace_search_

if (context_p->is_regexp)
{
ecma_value_t exec_arguments[1] = { context_p->input_string };

ECMA_TRY_CATCH (match_value,
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
context_p->regexp_or_search_string,
exec_arguments,
1),
ecma_regexp_exec_helper (context_p->regexp_or_search_string,
context_p->input_string,
false),
ret_value);

if (!ecma_is_value_null (match_value))
Expand Down Expand Up @@ -1504,7 +1497,6 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a

return ret_value;
} /* ecma_builtin_string_prototype_object_replace */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

/**
* The String.prototype object's 'search' routine
Expand All @@ -1517,11 +1509,91 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a
*/
static ecma_completion_value_t
ecma_builtin_string_prototype_object_search (ecma_value_t this_arg, /**< this argument */
ecma_value_t arg) /**< routine's argument */
ecma_value_t regexp_arg) /**< routine's argument */
{
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg);
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

/* 1. */
ECMA_TRY_CATCH (check_coercible_value,
ecma_op_check_object_coercible (this_arg),
ret_value);

/* 2. */
ECMA_TRY_CATCH (to_string_value,
ecma_op_to_string (this_arg),
ret_value);

ecma_value_t regexp_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);

/* 3. */
if (ecma_is_value_object (regexp_arg)
&& ecma_object_get_class_name (ecma_get_object_from_value (regexp_arg)) == LIT_MAGIC_STRING_REGEXP_UL)
{
regexp_value = ecma_copy_value (regexp_arg, true);
}
else
{
/* 4. */
ecma_value_t regexp_arguments[1] = { regexp_arg };

ECMA_TRY_CATCH (new_regexp_value,
ecma_builtin_regexp_dispatch_construct (regexp_arguments, 1),
ret_value);

regexp_value = ecma_copy_value (new_regexp_value, true);

ECMA_FINALIZE (new_regexp_value);
}

/* 5. */
if (ecma_is_completion_value_empty (ret_value))
{
ECMA_TRY_CATCH (match_result,
ecma_regexp_exec_helper (regexp_value, to_string_value, true),
ret_value);

ecma_number_t offset = -1;

if (!ecma_is_value_null (match_result))
{
JERRY_ASSERT (ecma_is_value_object (match_result));

ecma_object_t *match_object_p = ecma_get_object_from_value (match_result);
ecma_string_t *index_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);

ECMA_TRY_CATCH (index_value,
ecma_op_object_get (match_object_p, index_string_p),
ret_value);

JERRY_ASSERT (ecma_is_value_number (index_value));

offset = *ecma_get_number_from_value (index_value);

ECMA_FINALIZE (index_value);
ecma_deref_ecma_string (index_string_p);
}

if (ecma_is_completion_value_empty (ret_value))
{
ecma_number_t *offset_number_p = ecma_alloc_number ();
*offset_number_p = offset;

ret_value = ecma_make_normal_completion_value (ecma_make_number_value (offset_number_p));
}

ECMA_FINALIZE (match_result);
ecma_free_value (regexp_value, true);
}

ECMA_FINALIZE (to_string_value);
ECMA_FINALIZE (check_coercible_value);

/* 6. */
return ret_value;
} /* ecma_builtin_string_prototype_object_search */

#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

/**
* The String.prototype object's 'slice' routine
*
Expand Down
Expand Up @@ -71,9 +71,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1)
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
Expand Down
79 changes: 55 additions & 24 deletions jerry-core/ecma/operations/ecma-regexp-object.cpp
Expand Up @@ -1176,19 +1176,45 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
* Returned value must be freed with ecma_free_completion_value
*/
ecma_completion_value_t
ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
re_bytecode_t *bc_p, /**< start of the RegExp bytecode */
lit_utf8_iterator_t *iter_p) /**< input string iterator */
ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
ecma_value_t input_string, /**< input string */
bool ignore_global) /**< ignore global flag */
{
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

JERRY_ASSERT (ecma_is_value_object (regexp_value));
JERRY_ASSERT (ecma_is_value_string (input_string));

ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value);

JERRY_ASSERT (ecma_object_get_class_name (regexp_object_p) == LIT_MAGIC_STRING_REGEXP_UL);

ecma_property_t *bytecode_prop_p = ecma_get_internal_property (regexp_object_p,
ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
re_bytecode_t *bc_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);

ecma_string_t *input_string_p = ecma_get_string_from_value (input_string);
lit_utf8_size_t input_string_size = ecma_string_get_size (input_string_p);

MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_string_size, lit_utf8_byte_t);

ecma_string_to_utf8_string (input_string_p, input_utf8_buffer_p, (ssize_t) input_string_size);
lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_utf8_buffer_p, input_string_size);

re_matcher_ctx_t re_ctx;
re_ctx.input_start_p = iter_p->buf_p;
re_ctx.input_end_p = iter_p->buf_p + iter_p->buf_size;
re_ctx.input_start_p = iterator.buf_p;
re_ctx.input_end_p = iterator.buf_p + iterator.buf_size;
re_ctx.match_limit = 0;
re_ctx.recursion_depth = 0;

/* 1. Read bytecode header and init regexp matcher context. */
re_ctx.flags = (uint8_t) re_get_value (&bc_p);

if (ignore_global)
{
re_ctx.flags &= (uint8_t) ~RE_FLAG_GLOBAL;
}

JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n",
re_ctx.flags & RE_FLAG_GLOBAL,
re_ctx.flags & RE_FLAG_IGNORE_CASE,
Expand Down Expand Up @@ -1217,22 +1243,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
bool is_match = false;
re_ctx.num_of_iterations_p = num_of_iter_p;
int32_t index = 0;
ecma_length_t input_str_len = lit_utf8_string_length (iter_p->buf_p, iter_p->buf_size);
ecma_length_t input_str_len = lit_utf8_string_length (iterator.buf_p, iterator.buf_size);

if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p);
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (regexp_object_p, magic_str_p);

ECMA_OP_TO_NUMBER_TRY_CATCH (lastindex_num, lastindex_prop_p->u.named_data_property.value, ret_value)
index = ecma_number_to_int32 (lastindex_num);

JERRY_ASSERT (iter_p->buf_pos.offset == 0 && !iter_p->buf_pos.is_non_bmp_middle);
if (!lit_utf8_iterator_is_eos (iter_p)
JERRY_ASSERT (iterator.buf_pos.offset == 0 && !iterator.buf_pos.is_non_bmp_middle);
if (!lit_utf8_iterator_is_eos (&iterator)
&& index <= (int32_t) input_str_len
&& index > 0)
{
lit_utf8_iterator_advance (iter_p, (ecma_length_t) index);
lit_utf8_iterator_advance (&iterator, (ecma_length_t) index);
}
ECMA_OP_TO_NUMBER_FINALIZE (lastindex_num);
ecma_deref_ecma_string (magic_str_p);
Expand All @@ -1245,42 +1271,45 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
{
if (index < 0 || index > (int32_t) input_str_len)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = ECMA_NUMBER_ZERO;
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
if (re_ctx.flags & RE_FLAG_GLOBAL)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = ECMA_NUMBER_ZERO;
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
}

is_match = false;
break;
}
else
{
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, *iter_p, &sub_iter), ret_value);
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, iterator, &sub_iter), ret_value);

if (ecma_is_value_true (match_value))
{
is_match = true;
break;
}

if (!lit_utf8_iterator_is_eos (iter_p))
if (!lit_utf8_iterator_is_eos (&iterator))
{
lit_utf8_iterator_advance (iter_p, 1);
lit_utf8_iterator_advance (&iterator, 1);
}
index++;

ECMA_FINALIZE (match_value);
}
}

if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = sub_iter.buf_pos.offset;
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
}
Expand All @@ -1299,9 +1328,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
{
ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2);

/* Note: 'iter_p->buf_p == NULL' means the input is empty string */
/* Note: 'iterator.buf_p == NULL' means the input is empty string */
if (((re_ctx.saved_p[i].buf_p && re_ctx.saved_p[i + 1].buf_p)
|| (!iter_p->buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
|| (!iterator.buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
&& re_ctx.saved_p[i + 1].buf_pos.offset >= re_ctx.saved_p[i].buf_pos.offset)
{
ecma_length_t capture_str_len;
Expand Down Expand Up @@ -1336,8 +1365,10 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_NULL));
}
}

MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p);
MEM_FINALIZE_LOCAL_ARRAY (saved_p);
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);

return ret_value;
} /* ecma_regexp_exec_helper */
Expand Down
4 changes: 1 addition & 3 deletions jerry-core/ecma/operations/ecma-regexp-object.h
Expand Up @@ -59,9 +59,7 @@ extern ecma_completion_value_t
ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p);

extern ecma_completion_value_t
ecma_regexp_exec_helper (ecma_object_t *obj_p,
re_bytecode_t *bc_p,
lit_utf8_iterator_t *iter_p);
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);

/**
* @}
Expand Down