Skip to content

Commit

Permalink
Implement String.prototype.search, and some minor regexp refactors.
Browse files Browse the repository at this point in the history
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
  • Loading branch information
zherczeg committed Aug 4, 2015
1 parent f39a294 commit 0a1b6eb
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 65 deletions.
Expand Up @@ -67,30 +67,13 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
{
ECMA_TRY_CATCH (obj_this, ecma_op_to_object (this_arg), ret_value);

ecma_object_t *obj_p = ecma_get_object_from_value (obj_this);
ecma_property_t *bytecode_prop_p = ecma_get_internal_property (obj_p, ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
re_bytecode_t *bytecode_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);

ECMA_TRY_CATCH (input_str_value,
ecma_op_to_string (arg),
ret_value);

ecma_string_t *input_str_p = ecma_get_string_from_value (input_str_value);

/* Convert ecma_String_t *to regexp_bytecode_t* */
lit_utf8_size_t input_str_size = ecma_string_get_size (input_str_p);

MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_str_size, lit_utf8_byte_t);

ecma_string_to_utf8_string (input_str_p, input_utf8_buffer_p, (ssize_t) input_str_size);
lit_utf8_iterator_t iter = lit_utf8_iterator_create (input_utf8_buffer_p, input_str_size);

ret_value = ecma_regexp_exec_helper (obj_p, bytecode_p, &iter);

MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);
ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false);

ECMA_FINALIZE (input_str_value);

ECMA_FINALIZE (obj_this);
}

Expand Down
110 changes: 91 additions & 19 deletions jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp
Expand Up @@ -31,6 +31,10 @@
#include "jrt-libc-includes.h"
#include "lit-char-helpers.h"

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
#include "ecma-regexp-object.h"
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN

#define ECMA_BUILTINS_INTERNAL
Expand Down Expand Up @@ -560,15 +564,10 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg

JERRY_ASSERT (ecma_is_value_boolean (global_value));

ecma_value_t exec_arguments[1] = { this_to_string_value };

if (!ecma_is_value_true (global_value))
{
/* 7. */
ret_value = ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
regexp_value,
exec_arguments,
1);
ret_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false);
}
else
{
Expand Down Expand Up @@ -608,10 +607,7 @@ ecma_builtin_string_prototype_object_match (ecma_value_t this_arg, /**< this arg
{
/* 8.f.i. */
ECMA_TRY_CATCH (exec_value,
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
regexp_value,
exec_arguments,
1),
ecma_regexp_exec_helper (regexp_value, this_to_string_value, false),
ret_value);

if (ecma_is_value_null (exec_value))
Expand Down Expand Up @@ -829,13 +825,10 @@ ecma_builtin_string_prototype_object_replace_match (ecma_builtin_replace_search_

if (context_p->is_regexp)
{
ecma_value_t exec_arguments[1] = { context_p->input_string };

ECMA_TRY_CATCH (match_value,
ecma_builtin_regexp_prototype_dispatch_routine (LIT_MAGIC_STRING_EXEC,
context_p->regexp_or_search_string,
exec_arguments,
1),
ecma_regexp_exec_helper (context_p->regexp_or_search_string,
context_p->input_string,
false),
ret_value);

if (!ecma_is_value_null (match_value))
Expand Down Expand Up @@ -1504,7 +1497,6 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a

return ret_value;
} /* ecma_builtin_string_prototype_object_replace */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

/**
* The String.prototype object's 'search' routine
Expand All @@ -1517,11 +1509,91 @@ ecma_builtin_string_prototype_object_replace (ecma_value_t this_arg, /**< this a
*/
static ecma_completion_value_t
ecma_builtin_string_prototype_object_search (ecma_value_t this_arg, /**< this argument */
ecma_value_t arg) /**< routine's argument */
ecma_value_t regexp_arg) /**< routine's argument */
{
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg);
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

/* 1. */
ECMA_TRY_CATCH (check_coercible_value,
ecma_op_check_object_coercible (this_arg),
ret_value);

/* 2. */
ECMA_TRY_CATCH (to_string_value,
ecma_op_to_string (this_arg),
ret_value);

ecma_value_t regexp_value = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);

/* 3. */
if (ecma_is_value_object (regexp_arg)
&& ecma_object_get_class_name (ecma_get_object_from_value (regexp_arg)) == LIT_MAGIC_STRING_REGEXP_UL)
{
regexp_value = ecma_copy_value (regexp_arg, true);
}
else
{
/* 4. */
ecma_value_t regexp_arguments[1] = { regexp_arg };

ECMA_TRY_CATCH (new_regexp_value,
ecma_builtin_regexp_dispatch_construct (regexp_arguments, 1),
ret_value);

regexp_value = ecma_copy_value (new_regexp_value, true);

ECMA_FINALIZE (new_regexp_value);
}

/* 5. */
if (ecma_is_completion_value_empty (ret_value))
{
ECMA_TRY_CATCH (match_result,
ecma_regexp_exec_helper (regexp_value, to_string_value, true),
ret_value);

ecma_number_t offset = -1;

if (!ecma_is_value_null (match_result))
{
JERRY_ASSERT (ecma_is_value_object (match_result));

ecma_object_t *match_object_p = ecma_get_object_from_value (match_result);
ecma_string_t *index_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);

ECMA_TRY_CATCH (index_value,
ecma_op_object_get (match_object_p, index_string_p),
ret_value);

JERRY_ASSERT (ecma_is_value_number (index_value));

offset = *ecma_get_number_from_value (index_value);

ECMA_FINALIZE (index_value);
ecma_deref_ecma_string (index_string_p);
}

if (ecma_is_completion_value_empty (ret_value))
{
ecma_number_t *offset_number_p = ecma_alloc_number ();
*offset_number_p = offset;

ret_value = ecma_make_normal_completion_value (ecma_make_number_value (offset_number_p));
}

ECMA_FINALIZE (match_result);
ecma_free_value (regexp_value, true);
}

ECMA_FINALIZE (to_string_value);
ECMA_FINALIZE (check_coercible_value);

/* 6. */
return ret_value;
} /* ecma_builtin_string_prototype_object_search */

#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

/**
* The String.prototype object's 'slice' routine
*
Expand Down
Expand Up @@ -71,9 +71,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1)
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */

ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
Expand Down
79 changes: 55 additions & 24 deletions jerry-core/ecma/operations/ecma-regexp-object.cpp
Expand Up @@ -1176,19 +1176,45 @@ re_set_result_array_properties (ecma_object_t *array_obj_p, /**< result array */
* Returned value must be freed with ecma_free_completion_value
*/
ecma_completion_value_t
ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
re_bytecode_t *bc_p, /**< start of the RegExp bytecode */
lit_utf8_iterator_t *iter_p) /**< input string iterator */
ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
ecma_value_t input_string, /**< input string */
bool ignore_global) /**< ignore global flag */
{
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();

JERRY_ASSERT (ecma_is_value_object (regexp_value));
JERRY_ASSERT (ecma_is_value_string (input_string));

ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value);

JERRY_ASSERT (ecma_object_get_class_name (regexp_object_p) == LIT_MAGIC_STRING_REGEXP_UL);

ecma_property_t *bytecode_prop_p = ecma_get_internal_property (regexp_object_p,
ECMA_INTERNAL_PROPERTY_REGEXP_BYTECODE);
re_bytecode_t *bc_p = ECMA_GET_POINTER (re_bytecode_t, bytecode_prop_p->u.internal_property.value);

ecma_string_t *input_string_p = ecma_get_string_from_value (input_string);
lit_utf8_size_t input_string_size = ecma_string_get_size (input_string_p);

MEM_DEFINE_LOCAL_ARRAY (input_utf8_buffer_p, input_string_size, lit_utf8_byte_t);

ecma_string_to_utf8_string (input_string_p, input_utf8_buffer_p, (ssize_t) input_string_size);
lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_utf8_buffer_p, input_string_size);

re_matcher_ctx_t re_ctx;
re_ctx.input_start_p = iter_p->buf_p;
re_ctx.input_end_p = iter_p->buf_p + iter_p->buf_size;
re_ctx.input_start_p = iterator.buf_p;
re_ctx.input_end_p = iterator.buf_p + iterator.buf_size;
re_ctx.match_limit = 0;
re_ctx.recursion_depth = 0;

/* 1. Read bytecode header and init regexp matcher context. */
re_ctx.flags = (uint8_t) re_get_value (&bc_p);

if (ignore_global)
{
re_ctx.flags &= (uint8_t) ~RE_FLAG_GLOBAL;
}

JERRY_DDLOG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n",
re_ctx.flags & RE_FLAG_GLOBAL,
re_ctx.flags & RE_FLAG_IGNORE_CASE,
Expand Down Expand Up @@ -1217,22 +1243,22 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
bool is_match = false;
re_ctx.num_of_iterations_p = num_of_iter_p;
int32_t index = 0;
ecma_length_t input_str_len = lit_utf8_string_length (iter_p->buf_p, iter_p->buf_size);
ecma_length_t input_str_len = lit_utf8_string_length (iterator.buf_p, iterator.buf_size);

if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (obj_p, magic_str_p);
ecma_property_t *lastindex_prop_p = ecma_op_object_get_property (regexp_object_p, magic_str_p);

ECMA_OP_TO_NUMBER_TRY_CATCH (lastindex_num, lastindex_prop_p->u.named_data_property.value, ret_value)
index = ecma_number_to_int32 (lastindex_num);

JERRY_ASSERT (iter_p->buf_pos.offset == 0 && !iter_p->buf_pos.is_non_bmp_middle);
if (!lit_utf8_iterator_is_eos (iter_p)
JERRY_ASSERT (iterator.buf_pos.offset == 0 && !iterator.buf_pos.is_non_bmp_middle);
if (!lit_utf8_iterator_is_eos (&iterator)
&& index <= (int32_t) input_str_len
&& index > 0)
{
lit_utf8_iterator_advance (iter_p, (ecma_length_t) index);
lit_utf8_iterator_advance (&iterator, (ecma_length_t) index);
}
ECMA_OP_TO_NUMBER_FINALIZE (lastindex_num);
ecma_deref_ecma_string (magic_str_p);
Expand All @@ -1245,42 +1271,45 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
{
if (index < 0 || index > (int32_t) input_str_len)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = ECMA_NUMBER_ZERO;
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
if (re_ctx.flags & RE_FLAG_GLOBAL)
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = ECMA_NUMBER_ZERO;
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
}

is_match = false;
break;
}
else
{
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, *iter_p, &sub_iter), ret_value);
ECMA_TRY_CATCH (match_value, re_match_regexp (&re_ctx, bc_p, iterator, &sub_iter), ret_value);

if (ecma_is_value_true (match_value))
{
is_match = true;
break;
}

if (!lit_utf8_iterator_is_eos (iter_p))
if (!lit_utf8_iterator_is_eos (&iterator))
{
lit_utf8_iterator_advance (iter_p, 1);
lit_utf8_iterator_advance (&iterator, 1);
}
index++;

ECMA_FINALIZE (match_value);
}
}

if (iter_p->buf_p && re_ctx.flags & RE_FLAG_GLOBAL)
if (iterator.buf_p && (re_ctx.flags & RE_FLAG_GLOBAL))
{
ecma_string_t *magic_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
ecma_number_t *lastindex_num_p = ecma_alloc_number ();
*lastindex_num_p = sub_iter.buf_pos.offset;
ecma_op_object_put (obj_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_op_object_put (regexp_object_p, magic_str_p, ecma_make_number_value (lastindex_num_p), true);
ecma_dealloc_number (lastindex_num_p);
ecma_deref_ecma_string (magic_str_p);
}
Expand All @@ -1299,9 +1328,9 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
{
ecma_string_t *index_str_p = ecma_new_ecma_string_from_uint32 (i / 2);

/* Note: 'iter_p->buf_p == NULL' means the input is empty string */
/* Note: 'iterator.buf_p == NULL' means the input is empty string */
if (((re_ctx.saved_p[i].buf_p && re_ctx.saved_p[i + 1].buf_p)
|| (!iter_p->buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
|| (!iterator.buf_p && !re_ctx.saved_p[i].buf_p && !re_ctx.saved_p[i + 1].buf_p))
&& re_ctx.saved_p[i + 1].buf_pos.offset >= re_ctx.saved_p[i].buf_pos.offset)
{
ecma_length_t capture_str_len;
Expand Down Expand Up @@ -1336,8 +1365,10 @@ ecma_regexp_exec_helper (ecma_object_t *obj_p, /**< RegExp object */
ret_value = ecma_make_normal_completion_value (ecma_make_simple_value (ECMA_SIMPLE_VALUE_NULL));
}
}

MEM_FINALIZE_LOCAL_ARRAY (num_of_iter_p);
MEM_FINALIZE_LOCAL_ARRAY (saved_p);
MEM_FINALIZE_LOCAL_ARRAY (input_utf8_buffer_p);

return ret_value;
} /* ecma_regexp_exec_helper */
Expand Down
4 changes: 1 addition & 3 deletions jerry-core/ecma/operations/ecma-regexp-object.h
Expand Up @@ -59,9 +59,7 @@ extern ecma_completion_value_t
ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p);

extern ecma_completion_value_t
ecma_regexp_exec_helper (ecma_object_t *obj_p,
re_bytecode_t *bc_p,
lit_utf8_iterator_t *iter_p);
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);

/**
* @}
Expand Down

0 comments on commit 0a1b6eb

Please sign in to comment.